get_ebb_head_tail works with rtx_insn
[official-gcc.git] / gcc / config / sh / sh.c
blob13eb714750c48b3fea29ad91b1ad7ddcd0e3108a
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2014 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
23 #include <vector>
24 #include <algorithm>
26 #include "config.h"
27 #include "system.h"
28 #include "coretypes.h"
29 #include "tm.h"
30 #include "insn-config.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "stringpool.h"
34 #include "stor-layout.h"
35 #include "calls.h"
36 #include "varasm.h"
37 #include "flags.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "output.h"
45 #include "insn-attr.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "dwarf2.h"
49 #include "tm_p.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "langhooks.h"
53 #include "basic-block.h"
54 #include "df.h"
55 #include "intl.h"
56 #include "sched-int.h"
57 #include "params.h"
58 #include "ggc.h"
59 #include "hash-table.h"
60 #include "tree-ssa-alias.h"
61 #include "internal-fn.h"
62 #include "gimple-fold.h"
63 #include "tree-eh.h"
64 #include "gimple-expr.h"
65 #include "is-a.h"
66 #include "gimple.h"
67 #include "gimplify.h"
68 #include "cfgloop.h"
69 #include "alloc-pool.h"
70 #include "tm-constrs.h"
71 #include "opts.h"
72 #include "tree-pass.h"
73 #include "pass_manager.h"
74 #include "context.h"
75 #include "builtins.h"
77 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
79 /* These are some macros to abstract register modes. */
80 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
81 && ((HOST_WIDE_INT)(VALUE)) <= 511)
83 #define CONST_OK_FOR_ADD(size) \
84 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
85 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
86 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
87 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
89 /* Used to simplify the logic below. Find the attributes wherever
90 they may be. */
91 #define SH_ATTRIBUTES(decl) \
92 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
93 : DECL_ATTRIBUTES (decl) \
94 ? (DECL_ATTRIBUTES (decl)) \
95 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
97 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
98 int current_function_interrupt;
100 tree sh_deferred_function_attributes;
101 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* Number of r0 life regions. */
117 static int r0_life_regions;
119 /* If true, skip cycles for Q -> R movement. */
120 static int skip_cycles = 0;
122 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
123 and returned from sh_reorder2. */
124 static short cached_can_issue_more;
126 /* Unique number for UNSPEC_BBR pattern. */
127 static unsigned int unspec_bbr_uid = 1;
129 /* Provides the class number of the smallest class containing
130 reg number. */
131 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
133 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
170 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
171 GENERAL_REGS, GENERAL_REGS,
174 char sh_register_names[FIRST_PSEUDO_REGISTER] \
175 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
177 char sh_additional_register_names[ADDREGNAMES_SIZE] \
178 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
179 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
181 int assembler_dialect;
183 static bool shmedia_space_reserved_for_target_registers;
185 static void split_branches (rtx);
186 static int branch_dest (rtx);
187 static void print_slot (rtx);
188 static rtx add_constant (rtx, enum machine_mode, rtx);
189 static void dump_table (rtx, rtx);
190 static bool broken_move (rtx);
191 static bool mova_p (rtx);
192 static rtx find_barrier (int, rtx, rtx);
193 static bool noncall_uses_reg (rtx, rtx, rtx *);
194 static rtx gen_block_redirect (rtx, int, int);
195 static void sh_reorg (void);
196 static void sh_option_override (void);
197 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
198 static rtx_insn *frame_insn (rtx);
199 static rtx push (int);
200 static void pop (int);
201 static void push_regs (HARD_REG_SET *, int);
202 static int calc_live_regs (HARD_REG_SET *);
203 static HOST_WIDE_INT rounded_frame_size (int);
204 static bool sh_frame_pointer_required (void);
205 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
206 static int sh_mode_needed (int, rtx);
207 static int sh_mode_after (int, int, rtx);
208 static int sh_mode_entry (int);
209 static int sh_mode_exit (int);
210 static int sh_mode_priority (int entity, int n);
212 static rtx mark_constant_pool_use (rtx);
213 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
214 int, bool *);
215 static tree sh_handle_resbank_handler_attribute (tree *, tree,
216 tree, int, bool *);
217 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
218 tree, int, bool *);
219 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
220 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
221 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
222 static void sh_print_operand (FILE *, rtx, int);
223 static void sh_print_operand_address (FILE *, rtx);
224 static bool sh_print_operand_punct_valid_p (unsigned char code);
225 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
226 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
227 static void sh_insert_attributes (tree, tree *);
228 static const char *sh_check_pch_target_flags (int);
229 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
230 static int sh_adjust_cost (rtx, rtx, rtx, int);
231 static int sh_issue_rate (void);
232 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
233 static short find_set_regmode_weight (rtx, enum machine_mode);
234 static short find_insn_regmode_weight (rtx, enum machine_mode);
235 static void find_regmode_weight (basic_block, enum machine_mode);
236 static int find_r0_life_regions (basic_block);
237 static void sh_md_init_global (FILE *, int, int);
238 static void sh_md_finish_global (FILE *, int);
239 static int rank_for_reorder (const void *, const void *);
240 static void swap_reorder (rtx *, int);
241 static void ready_reorder (rtx *, int);
242 static bool high_pressure (enum machine_mode);
243 static int sh_reorder (FILE *, int, rtx *, int *, int);
244 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
245 static void sh_md_init (FILE *, int, int);
246 static int sh_variable_issue (FILE *, int, rtx, int);
248 static bool sh_function_ok_for_sibcall (tree, tree);
250 static bool sh_cannot_modify_jumps_p (void);
251 static reg_class_t sh_target_reg_class (void);
252 static bool sh_optimize_target_register_callee_saved (bool);
253 static bool sh_ms_bitfield_layout_p (const_tree);
255 static void sh_init_builtins (void);
256 static tree sh_builtin_decl (unsigned, bool);
257 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
258 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
259 HOST_WIDE_INT, tree);
260 static void sh_file_start (void);
261 static bool flow_dependent_p (rtx, rtx);
262 static void flow_dependent_p_1 (rtx, const_rtx, void *);
263 static int shiftcosts (rtx);
264 static int and_xor_ior_costs (rtx, int);
265 static int addsubcosts (rtx);
266 static int multcosts (rtx);
267 static bool unspec_caller_rtx_p (rtx);
268 static bool sh_cannot_copy_insn_p (rtx);
269 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
270 static int sh_address_cost (rtx, enum machine_mode, addr_space_t, bool);
271 static int sh_pr_n_sets (void);
272 static rtx sh_allocate_initial_value (rtx);
273 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
274 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
275 enum machine_mode,
276 struct secondary_reload_info *);
277 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
278 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
279 static rtx sh_delegitimize_address (rtx);
280 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
281 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
282 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
283 static int scavenge_reg (HARD_REG_SET *s);
284 struct save_schedule_s;
285 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
286 struct save_schedule_s *, int);
288 static rtx sh_struct_value_rtx (tree, int);
289 static rtx sh_function_value (const_tree, const_tree, bool);
290 static bool sh_function_value_regno_p (const unsigned int);
291 static rtx sh_libcall_value (enum machine_mode, const_rtx);
292 static bool sh_return_in_memory (const_tree, const_tree);
293 static rtx sh_builtin_saveregs (void);
294 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
295 tree, int *, int);
296 static bool sh_strict_argument_naming (cumulative_args_t);
297 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
298 static tree sh_build_builtin_va_list (void);
299 static void sh_va_start (tree, rtx);
300 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
301 static bool sh_promote_prototypes (const_tree);
302 static enum machine_mode sh_promote_function_mode (const_tree type,
303 enum machine_mode,
304 int *punsignedp,
305 const_tree funtype,
306 int for_return);
307 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
308 const_tree, bool);
309 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
310 const_tree, bool);
311 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
312 tree, bool);
313 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
314 const_tree, bool);
315 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
316 const_tree, bool);
317 static bool sh_scalar_mode_supported_p (enum machine_mode);
318 static int sh_dwarf_calling_convention (const_tree);
319 static void sh_encode_section_info (tree, rtx, int);
320 static bool sh2a_function_vector_p (tree);
321 static void sh_trampoline_init (rtx, tree, rtx);
322 static rtx sh_trampoline_adjust_address (rtx);
323 static void sh_conditional_register_usage (void);
324 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
325 static int mov_insn_size (enum machine_mode, bool);
326 static int mov_insn_alignment_mask (enum machine_mode, bool);
327 static bool sequence_insn_p (rtx);
328 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
329 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
330 enum machine_mode, bool);
331 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
333 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
335 static const struct attribute_spec sh_attribute_table[] =
337 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
338 affects_type_identity } */
339 { "interrupt_handler", 0, 0, true, false, false,
340 sh_handle_interrupt_handler_attribute, false },
341 { "sp_switch", 1, 1, true, false, false,
342 sh_handle_sp_switch_attribute, false },
343 { "trap_exit", 1, 1, true, false, false,
344 sh_handle_trap_exit_attribute, false },
345 { "renesas", 0, 0, false, true, false,
346 sh_handle_renesas_attribute, false },
347 { "trapa_handler", 0, 0, true, false, false,
348 sh_handle_interrupt_handler_attribute, false },
349 { "nosave_low_regs", 0, 0, true, false, false,
350 sh_handle_interrupt_handler_attribute, false },
351 { "resbank", 0, 0, true, false, false,
352 sh_handle_resbank_handler_attribute, false },
353 { "function_vector", 1, 1, true, false, false,
354 sh2a_handle_function_vector_handler_attribute, false },
355 { NULL, 0, 0, false, false, false, NULL, false }
358 /* Initialize the GCC target structure. */
359 #undef TARGET_ATTRIBUTE_TABLE
360 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
362 /* The next two are used for debug info when compiling with -gdwarf. */
363 #undef TARGET_ASM_UNALIGNED_HI_OP
364 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
365 #undef TARGET_ASM_UNALIGNED_SI_OP
366 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
368 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
369 #undef TARGET_ASM_UNALIGNED_DI_OP
370 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
371 #undef TARGET_ASM_ALIGNED_DI_OP
372 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
374 #undef TARGET_OPTION_OVERRIDE
375 #define TARGET_OPTION_OVERRIDE sh_option_override
377 #undef TARGET_PRINT_OPERAND
378 #define TARGET_PRINT_OPERAND sh_print_operand
379 #undef TARGET_PRINT_OPERAND_ADDRESS
380 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
381 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
382 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
383 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
384 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
386 #undef TARGET_ASM_FUNCTION_EPILOGUE
387 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
389 #undef TARGET_ASM_OUTPUT_MI_THUNK
390 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
392 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
393 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
394 hook_bool_const_tree_hwi_hwi_const_tree_true
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START sh_file_start
398 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
399 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
401 #undef TARGET_REGISTER_MOVE_COST
402 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
404 #undef TARGET_INSERT_ATTRIBUTES
405 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
407 #undef TARGET_SCHED_ADJUST_COST
408 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
410 #undef TARGET_SCHED_ISSUE_RATE
411 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
413 /* The next 5 hooks have been implemented for reenabling sched1. With the
414 help of these macros we are limiting the movement of insns in sched1 to
415 reduce the register pressure. The overall idea is to keep count of SImode
416 and SFmode regs required by already scheduled insns. When these counts
417 cross some threshold values; give priority to insns that free registers.
418 The insn that frees registers is most likely to be the insn with lowest
419 LUID (original insn order); but such an insn might be there in the stalled
420 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
421 up to a max of 8 cycles so that such insns may move from Q -> R.
423 The description of the hooks are as below:
425 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
426 scheduler; it is called inside the sched_init function just after
427 find_insn_reg_weights function call. It is used to calculate the SImode
428 and SFmode weights of insns of basic blocks; much similar to what
429 find_insn_reg_weights does.
430 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
432 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
433 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
434 (Q)->(R).
436 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
437 high; reorder the ready queue so that the insn with lowest LUID will be
438 issued next.
440 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
441 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
443 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
444 can be returned from TARGET_SCHED_REORDER2.
446 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
448 #undef TARGET_SCHED_DFA_NEW_CYCLE
449 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
451 #undef TARGET_SCHED_INIT_GLOBAL
452 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
454 #undef TARGET_SCHED_FINISH_GLOBAL
455 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
457 #undef TARGET_SCHED_VARIABLE_ISSUE
458 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
460 #undef TARGET_SCHED_REORDER
461 #define TARGET_SCHED_REORDER sh_reorder
463 #undef TARGET_SCHED_REORDER2
464 #define TARGET_SCHED_REORDER2 sh_reorder2
466 #undef TARGET_SCHED_INIT
467 #define TARGET_SCHED_INIT sh_md_init
469 #undef TARGET_DELEGITIMIZE_ADDRESS
470 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
472 #undef TARGET_LEGITIMIZE_ADDRESS
473 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
475 #undef TARGET_CANNOT_MODIFY_JUMPS_P
476 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
477 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
478 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
479 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
480 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
481 sh_optimize_target_register_callee_saved
483 #undef TARGET_MS_BITFIELD_LAYOUT_P
484 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
486 #undef TARGET_INIT_BUILTINS
487 #define TARGET_INIT_BUILTINS sh_init_builtins
488 #undef TARGET_BUILTIN_DECL
489 #define TARGET_BUILTIN_DECL sh_builtin_decl
490 #undef TARGET_EXPAND_BUILTIN
491 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
493 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
494 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
496 #undef TARGET_CANNOT_COPY_INSN_P
497 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
498 #undef TARGET_RTX_COSTS
499 #define TARGET_RTX_COSTS sh_rtx_costs
500 #undef TARGET_ADDRESS_COST
501 #define TARGET_ADDRESS_COST sh_address_cost
502 #undef TARGET_ALLOCATE_INITIAL_VALUE
503 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
505 #undef TARGET_MACHINE_DEPENDENT_REORG
506 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
508 #undef TARGET_DWARF_REGISTER_SPAN
509 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
511 #ifdef HAVE_AS_TLS
512 #undef TARGET_HAVE_TLS
513 #define TARGET_HAVE_TLS true
514 #endif
516 #undef TARGET_PROMOTE_PROTOTYPES
517 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
518 #undef TARGET_PROMOTE_FUNCTION_MODE
519 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
521 #undef TARGET_FUNCTION_VALUE
522 #define TARGET_FUNCTION_VALUE sh_function_value
523 #undef TARGET_FUNCTION_VALUE_REGNO_P
524 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
525 #undef TARGET_LIBCALL_VALUE
526 #define TARGET_LIBCALL_VALUE sh_libcall_value
527 #undef TARGET_STRUCT_VALUE_RTX
528 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
532 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
533 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
534 #undef TARGET_SETUP_INCOMING_VARARGS
535 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
536 #undef TARGET_STRICT_ARGUMENT_NAMING
537 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
538 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
539 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
540 #undef TARGET_MUST_PASS_IN_STACK
541 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
542 #undef TARGET_PASS_BY_REFERENCE
543 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
544 #undef TARGET_CALLEE_COPIES
545 #define TARGET_CALLEE_COPIES sh_callee_copies
546 #undef TARGET_ARG_PARTIAL_BYTES
547 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
548 #undef TARGET_FUNCTION_ARG
549 #define TARGET_FUNCTION_ARG sh_function_arg
550 #undef TARGET_FUNCTION_ARG_ADVANCE
551 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
553 #undef TARGET_BUILD_BUILTIN_VA_LIST
554 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
555 #undef TARGET_EXPAND_BUILTIN_VA_START
556 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
557 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
558 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
560 #undef TARGET_SCALAR_MODE_SUPPORTED_P
561 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
562 #undef TARGET_VECTOR_MODE_SUPPORTED_P
563 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
565 #undef TARGET_CHECK_PCH_TARGET_FLAGS
566 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
568 #undef TARGET_DWARF_CALLING_CONVENTION
569 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
571 #undef TARGET_FRAME_POINTER_REQUIRED
572 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
574 #undef TARGET_MODE_EMIT
575 #define TARGET_MODE_EMIT sh_emit_mode_set
577 #undef TARGET_MODE_NEEDED
578 #define TARGET_MODE_NEEDED sh_mode_needed
580 #undef TARGET_MODE_AFTER
581 #define TARGET_MODE_AFTER sh_mode_after
583 #undef TARGET_MODE_ENTRY
584 #define TARGET_MODE_ENTRY sh_mode_entry
586 #undef TARGET_MODE_EXIT
587 #define TARGET_MODE_EXIT sh_mode_exit
589 #undef TARGET_MODE_PRIORITY
590 #define TARGET_MODE_PRIORITY sh_mode_priority
592 /* Return regmode weight for insn. */
593 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
594 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
596 /* Return current register pressure for regmode. */
597 #define CURR_REGMODE_PRESSURE(MODE)\
598 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
600 #undef TARGET_ENCODE_SECTION_INFO
601 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
603 #undef TARGET_SECONDARY_RELOAD
604 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
606 #undef TARGET_PREFERRED_RELOAD_CLASS
607 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
609 #undef TARGET_CONDITIONAL_REGISTER_USAGE
610 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
612 #undef TARGET_LEGITIMATE_ADDRESS_P
613 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
615 #undef TARGET_TRAMPOLINE_INIT
616 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
617 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
618 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
620 #undef TARGET_LEGITIMATE_CONSTANT_P
621 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
623 #undef TARGET_CANONICALIZE_COMPARISON
624 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
626 #undef TARGET_FIXED_CONDITION_CODE_REGS
627 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
629 /* Machine-specific symbol_ref flags. */
630 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
632 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
633 is used by optabs.c atomic op expansion code as well as in sync.md. */
634 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
635 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
637 struct gcc_target targetm = TARGET_INITIALIZER;
640 /* Information on the currently selected atomic model.
641 This is initialized in sh_option_override. */
642 static sh_atomic_model selected_atomic_model_;
644 const sh_atomic_model&
645 selected_atomic_model (void)
647 return selected_atomic_model_;
650 static sh_atomic_model
651 parse_validate_atomic_model_option (const char* str)
653 const char* model_names[sh_atomic_model::num_models];
654 model_names[sh_atomic_model::none] = "none";
655 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
656 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
657 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
658 model_names[sh_atomic_model::soft_imask] = "soft-imask";
660 const char* model_cdef_names[sh_atomic_model::num_models];
661 model_cdef_names[sh_atomic_model::none] = "NONE";
662 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
663 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
664 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
665 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
667 sh_atomic_model ret;
668 ret.type = sh_atomic_model::none;
669 ret.name = model_names[sh_atomic_model::none];
670 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
671 ret.strict = false;
672 ret.tcb_gbr_offset = -1;
674 /* Handle empty string as 'none'. */
675 if (str == NULL || *str == '\0')
676 return ret;
678 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
680 std::vector<std::string> tokens;
681 for (std::stringstream ss (str); ss.good (); )
683 tokens.push_back (std::string ());
684 std::getline (ss, tokens.back (), ',');
687 if (tokens.empty ())
688 err_ret ("invalid atomic model option");
690 /* The first token must be the atomic model name. */
692 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
693 if (tokens.front () == model_names[i])
695 ret.type = (sh_atomic_model::enum_type)i;
696 ret.name = model_names[i];
697 ret.cdef_name = model_cdef_names[i];
698 goto got_mode_name;
701 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
702 got_mode_name:;
705 /* Go through the remaining tokens. */
706 for (size_t i = 1; i < tokens.size (); ++i)
708 if (tokens[i] == "strict")
709 ret.strict = true;
710 else if (tokens[i].find ("gbr-offset=") == 0)
712 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
713 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
714 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
715 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
716 "option", offset_str.c_str ());
718 else
719 err_ret ("unknown parameter \"%s\" in atomic model option",
720 tokens[i].c_str ());
723 /* Check that the selection makes sense. */
724 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
725 err_ret ("atomic operations are not supported on SHmedia");
727 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
728 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
729 ret.name);
731 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
732 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
734 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
735 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
737 if (ret.type == sh_atomic_model::soft_tcb
738 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
739 || (ret.tcb_gbr_offset & 3) != 0))
740 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
741 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
742 ret.name);
744 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
745 err_ret ("cannot use atomic model %s in user mode", ret.name);
747 return ret;
749 #undef err_ret
752 /* Register SH specific RTL passes. */
753 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
754 const char* name);
755 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
756 const char* name);
757 static void
758 register_sh_passes (void)
760 if (!TARGET_SH1)
761 return;
763 /* Running the sh_treg_combine pass after ce1 generates better code when
764 comparisons are combined and reg-reg moves are introduced, because
765 reg-reg moves will be eliminated afterwards. However, there are quite
766 some cases where combine will be unable to fold comparison related insns,
767 thus for now don't do it.
768 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
769 PASS_POS_INSERT_AFTER, "ce1", 1);
772 /* Run sh_treg_combine pass after combine but before register allocation. */
773 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
774 PASS_POS_INSERT_AFTER, "split1", 1);
776 /* Run sh_treg_combine pass after register allocation and basic block
777 reordering as this sometimes creates new opportunities. */
778 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
779 PASS_POS_INSERT_AFTER, "split4", 1);
781 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
782 is known after a conditional branch.
783 This must be done after basic blocks and branch conditions have
784 stabilized and won't be changed by further passes. */
785 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
786 PASS_POS_INSERT_BEFORE, "sched2", 1);
789 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
790 various options, and do some machine dependent initialization. */
791 static void
792 sh_option_override (void)
794 int regno;
796 SUBTARGET_OVERRIDE_OPTIONS;
797 if (optimize > 1 && !optimize_size)
798 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
800 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
801 TARGET_CBRANCHDI4 = 1;
802 TARGET_CMPEQDI_T = 0;
804 sh_cpu = PROCESSOR_SH1;
805 assembler_dialect = 0;
806 if (TARGET_SH2)
807 sh_cpu = PROCESSOR_SH2;
808 if (TARGET_SH2E)
809 sh_cpu = PROCESSOR_SH2E;
810 if (TARGET_SH2A)
811 sh_cpu = PROCESSOR_SH2A;
812 if (TARGET_SH3)
813 sh_cpu = PROCESSOR_SH3;
814 if (TARGET_SH3E)
815 sh_cpu = PROCESSOR_SH3E;
816 if (TARGET_SH4)
818 assembler_dialect = 1;
819 sh_cpu = PROCESSOR_SH4;
821 if (TARGET_SH4A_ARCH)
823 assembler_dialect = 1;
824 sh_cpu = PROCESSOR_SH4A;
826 if (TARGET_SH5)
828 sh_cpu = PROCESSOR_SH5;
829 target_flags |= MASK_ALIGN_DOUBLE;
830 if (TARGET_SHMEDIA_FPU)
831 target_flags |= MASK_FMOVD;
832 if (TARGET_SHMEDIA)
834 /* There are no delay slots on SHmedia. */
835 flag_delayed_branch = 0;
836 /* Relaxation isn't yet supported for SHmedia */
837 target_flags &= ~MASK_RELAX;
838 /* After reload, if conversion does little good but can cause
839 ICEs:
840 - find_if_block doesn't do anything for SH because we don't
841 have conditional execution patterns. (We use conditional
842 move patterns, which are handled differently, and only
843 before reload).
844 - find_cond_trap doesn't do anything for the SH because we
845 don't have conditional traps.
846 - find_if_case_1 uses redirect_edge_and_branch_force in
847 the only path that does an optimization, and this causes
848 an ICE when branch targets are in registers.
849 - find_if_case_2 doesn't do anything for the SHmedia after
850 reload except when it can redirect a tablejump - and
851 that's rather rare. */
852 flag_if_conversion2 = 0;
853 if (! strcmp (sh_div_str, "call"))
854 sh_div_strategy = SH_DIV_CALL;
855 else if (! strcmp (sh_div_str, "call2"))
856 sh_div_strategy = SH_DIV_CALL2;
857 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
858 sh_div_strategy = SH_DIV_FP;
859 else if (! strcmp (sh_div_str, "inv"))
860 sh_div_strategy = SH_DIV_INV;
861 else if (! strcmp (sh_div_str, "inv:minlat"))
862 sh_div_strategy = SH_DIV_INV_MINLAT;
863 else if (! strcmp (sh_div_str, "inv20u"))
864 sh_div_strategy = SH_DIV_INV20U;
865 else if (! strcmp (sh_div_str, "inv20l"))
866 sh_div_strategy = SH_DIV_INV20L;
867 else if (! strcmp (sh_div_str, "inv:call2"))
868 sh_div_strategy = SH_DIV_INV_CALL2;
869 else if (! strcmp (sh_div_str, "inv:call"))
870 sh_div_strategy = SH_DIV_INV_CALL;
871 else if (! strcmp (sh_div_str, "inv:fp"))
873 if (TARGET_FPU_ANY)
874 sh_div_strategy = SH_DIV_INV_FP;
875 else
876 sh_div_strategy = SH_DIV_INV;
878 TARGET_CBRANCHDI4 = 0;
879 /* Assembler CFI isn't yet fully supported for SHmedia. */
880 flag_dwarf2_cfi_asm = 0;
883 else
885 /* Only the sh64-elf assembler fully supports .quad properly. */
886 targetm.asm_out.aligned_op.di = NULL;
887 targetm.asm_out.unaligned_op.di = NULL;
889 if (TARGET_SH1)
891 if (! strcmp (sh_div_str, "call-div1"))
892 sh_div_strategy = SH_DIV_CALL_DIV1;
893 else if (! strcmp (sh_div_str, "call-fp")
894 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
895 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
896 sh_div_strategy = SH_DIV_CALL_FP;
897 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
898 sh_div_strategy = SH_DIV_CALL_TABLE;
899 else
900 /* Pick one that makes most sense for the target in general.
901 It is not much good to use different functions depending
902 on -Os, since then we'll end up with two different functions
903 when some of the code is compiled for size, and some for
904 speed. */
906 /* SH4 tends to emphasize speed. */
907 if (TARGET_HARD_SH4)
908 sh_div_strategy = SH_DIV_CALL_TABLE;
909 /* These have their own way of doing things. */
910 else if (TARGET_SH2A)
911 sh_div_strategy = SH_DIV_INTRINSIC;
912 /* ??? Should we use the integer SHmedia function instead? */
913 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
914 sh_div_strategy = SH_DIV_CALL_FP;
915 /* SH1 .. SH3 cores often go into small-footprint systems, so
916 default to the smallest implementation available. */
917 else
918 sh_div_strategy = SH_DIV_CALL_DIV1;
920 if (!TARGET_SH1)
921 TARGET_PRETEND_CMOVE = 0;
922 if (sh_divsi3_libfunc[0])
923 ; /* User supplied - leave it alone. */
924 else if (TARGET_DIVIDE_CALL_FP)
925 sh_divsi3_libfunc = "__sdivsi3_i4";
926 else if (TARGET_DIVIDE_CALL_TABLE)
927 sh_divsi3_libfunc = "__sdivsi3_i4i";
928 else if (TARGET_SH5)
929 sh_divsi3_libfunc = "__sdivsi3_1";
930 else
931 sh_divsi3_libfunc = "__sdivsi3";
933 if (sh_branch_cost == -1)
935 /* The SH1 does not have delay slots, hence we get a pipeline stall
936 at every branch. The SH4 is superscalar, so the single delay slot
937 is not sufficient to keep both pipelines filled.
938 In any case, set the default branch cost to '2', as it results in
939 slightly overall smaller code and also enables some if conversions
940 that are required for matching special T bit related insns. */
941 sh_branch_cost = 2;
944 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
945 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
946 TARGET_ZDCBRANCH = 1;
948 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
949 if (! VALID_REGISTER_P (regno))
950 sh_register_names[regno][0] = '\0';
952 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
953 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
954 sh_additional_register_names[regno][0] = '\0';
956 if ((flag_pic && ! TARGET_PREFERGOT)
957 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
958 flag_no_function_cse = 1;
960 if (targetm.small_register_classes_for_mode_p (VOIDmode))
962 /* Never run scheduling before reload, since that can
963 break global alloc, and generates slower code anyway due
964 to the pressure on R0. */
965 /* Enable sched1 for SH4 if the user explicitly requests.
966 When sched1 is enabled, the ready queue will be reordered by
967 the target hooks if pressure is high. We can not do this for
968 PIC, SH3 and lower as they give spill failures for R0. */
969 if (!TARGET_HARD_SH4 || flag_pic)
970 flag_schedule_insns = 0;
971 /* ??? Current exception handling places basic block boundaries
972 after call_insns. It causes the high pressure on R0 and gives
973 spill failures for R0 in reload. See PR 22553 and the thread
974 on gcc-patches
975 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
976 else if (flag_exceptions)
978 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
979 warning (0, "ignoring -fschedule-insns because of exception "
980 "handling bug");
981 flag_schedule_insns = 0;
983 else if (flag_schedule_insns
984 && !global_options_set.x_flag_schedule_insns)
985 flag_schedule_insns = 0;
988 /* Unwind info is not correct around the CFG unless either a frame
989 pointer is present or M_A_O_A is set. Fixing this requires rewriting
990 unwind info generation to be aware of the CFG and propagating states
991 around edges. */
992 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
993 || flag_exceptions || flag_non_call_exceptions)
994 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
996 warning (0, "unwind tables currently require either a frame pointer "
997 "or -maccumulate-outgoing-args for correctness");
998 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
1001 /* Unwinding with -freorder-blocks-and-partition does not work on this
1002 architecture, because it requires far jumps to label crossing between
1003 hot/cold sections which are rejected on this architecture. */
1004 if (flag_reorder_blocks_and_partition)
1006 if (flag_exceptions)
1008 inform (input_location,
1009 "-freorder-blocks-and-partition does not work with "
1010 "exceptions on this architecture");
1011 flag_reorder_blocks_and_partition = 0;
1012 flag_reorder_blocks = 1;
1014 else if (flag_unwind_tables)
1016 inform (input_location,
1017 "-freorder-blocks-and-partition does not support unwind "
1018 "info on this architecture");
1019 flag_reorder_blocks_and_partition = 0;
1020 flag_reorder_blocks = 1;
1024 /* Adjust loop, jump and function alignment values (in bytes), if those
1025 were not specified by the user using -falign-loops, -falign-jumps
1026 and -falign-functions options.
1027 32 bit alignment is better for speed, because instructions can be
1028 fetched as a pair from a longword boundary. For size use 16 bit
1029 alignment to get more compact code.
1030 Aligning all jumps increases the code size, even if it might
1031 result in slightly faster code. Thus, it is set to the smallest
1032 alignment possible if not specified by the user. */
1033 if (align_loops == 0)
1035 if (TARGET_SH5)
1036 align_loops = 8;
1037 else
1038 align_loops = optimize_size ? 2 : 4;
1041 if (align_jumps == 0)
1043 if (TARGET_SHMEDIA)
1044 align_jumps = 1 << CACHE_LOG;
1045 else
1046 align_jumps = 2;
1048 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1049 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1051 if (align_functions == 0)
1053 if (TARGET_SHMEDIA)
1054 align_functions = optimize_size
1055 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1056 else
1057 align_functions = optimize_size ? 2 : 4;
1060 /* The linker relaxation code breaks when a function contains
1061 alignments that are larger than that at the start of a
1062 compilation unit. */
1063 if (TARGET_RELAX)
1065 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1067 /* Also take possible .long constants / mova tables into account. */
1068 if (min_align < 4)
1069 min_align = 4;
1070 if (align_functions < min_align)
1071 align_functions = min_align;
1074 if (flag_unsafe_math_optimizations)
1076 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1077 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1078 TARGET_FSCA = 1;
1080 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1081 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1082 TARGET_FSRRA = 1;
1085 /* Allow fsrra insn only if -funsafe-math-optimizations and
1086 -ffinite-math-only is enabled. */
1087 TARGET_FSRRA = TARGET_FSRRA
1088 && flag_unsafe_math_optimizations
1089 && flag_finite_math_only;
1091 /* If the -mieee option was not explicitly set by the user, turn it on
1092 unless -ffinite-math-only was specified. See also PR 33135. */
1093 if (! global_options_set.x_TARGET_IEEE)
1094 TARGET_IEEE = ! flag_finite_math_only;
1096 if (sh_fixed_range_str)
1097 sh_fix_range (sh_fixed_range_str);
1099 /* This target defaults to strict volatile bitfields. */
1100 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1101 flag_strict_volatile_bitfields = 1;
1103 /* Parse atomic model option and make sure it is valid for the current
1104 target CPU. */
1105 selected_atomic_model_
1106 = parse_validate_atomic_model_option (sh_atomic_model_str);
1108 register_sh_passes ();
1111 /* Print the operand address in x to the stream. */
1112 static void
1113 sh_print_operand_address (FILE *stream, rtx x)
1115 switch (GET_CODE (x))
1117 case REG:
1118 case SUBREG:
1119 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1120 break;
1122 case PLUS:
1124 rtx base = XEXP (x, 0);
1125 rtx index = XEXP (x, 1);
1127 switch (GET_CODE (index))
1129 case CONST_INT:
1130 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1131 reg_names[true_regnum (base)]);
1132 break;
1134 case REG:
1135 case SUBREG:
1137 int base_num = true_regnum (base);
1138 int index_num = true_regnum (index);
1140 fprintf (stream, "@(r0,%s)",
1141 reg_names[MAX (base_num, index_num)]);
1142 break;
1145 default:
1146 gcc_unreachable ();
1149 break;
1151 case PRE_DEC:
1152 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1153 break;
1155 case POST_INC:
1156 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1157 break;
1159 default:
1160 x = mark_constant_pool_use (x);
1161 output_addr_const (stream, x);
1162 break;
1166 /* Print operand x (an rtx) in assembler syntax to file stream
1167 according to modifier code.
1169 '.' print a .s if insn needs delay slot
1170 ',' print LOCAL_LABEL_PREFIX
1171 '@' print trap, rte or rts depending upon pragma interruptness
1172 '#' output a nop if there is nothing to put in the delay slot
1173 ''' print likelihood suffix (/u for unlikely).
1174 '>' print branch target if -fverbose-asm
1175 'O' print a constant without the #
1176 'R' print the LSW of a dp value - changes if in little endian
1177 'S' print the MSW of a dp value - changes if in little endian
1178 'T' print the next word of a dp value - same as 'R' in big endian mode.
1179 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1180 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1181 'N' print 'r63' if the operand is (const_int 0).
1182 'd' print a V2SF reg as dN instead of fpN.
1183 'm' print a pair `base,offset' or `base,index', for LD and ST.
1184 'U' Likewise for {LD,ST}{HI,LO}.
1185 'V' print the position of a single bit set.
1186 'W' print the position of a single bit cleared.
1187 't' print a memory address which is a register.
1188 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1189 'o' output an operator. */
1190 static void
1191 sh_print_operand (FILE *stream, rtx x, int code)
1193 int regno;
1194 enum machine_mode mode;
1196 switch (code)
1198 tree trapa_attr;
1200 case '.':
1201 if (final_sequence
1202 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1203 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1204 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1205 break;
1206 case ',':
1207 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1208 break;
1209 case '@':
1210 trapa_attr = lookup_attribute ("trap_exit",
1211 DECL_ATTRIBUTES (current_function_decl));
1212 if (trapa_attr)
1213 fprintf (stream, "trapa #%ld",
1214 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1215 else if (sh_cfun_interrupt_handler_p ())
1217 if (sh_cfun_resbank_handler_p ())
1218 fprintf (stream, "resbank\n");
1219 fprintf (stream, "rte");
1221 else
1222 fprintf (stream, "rts");
1223 break;
1224 case '#':
1225 /* Output a nop if there's nothing in the delay slot. */
1226 if (dbr_sequence_length () == 0)
1227 fprintf (stream, "\n\tnop");
1228 break;
1229 case '\'':
1231 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1233 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1234 fputs ("/u", stream);
1235 break;
1237 case '>':
1238 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1240 fputs ("\t! target: ", stream);
1241 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1243 break;
1244 case 'O':
1245 x = mark_constant_pool_use (x);
1246 output_addr_const (stream, x);
1247 break;
1248 /* N.B.: %R / %S / %T adjust memory addresses by four.
1249 For SHMEDIA, that means they can be used to access the first and
1250 second 32 bit part of a 64 bit (or larger) value that
1251 might be held in floating point registers or memory.
1252 While they can be used to access 64 bit parts of a larger value
1253 held in general purpose registers, that won't work with memory -
1254 neither for fp registers, since the frxx names are used. */
1255 case 'R':
1256 if (REG_P (x) || GET_CODE (x) == SUBREG)
1258 regno = true_regnum (x);
1259 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1260 fputs (reg_names[regno], (stream));
1262 else if (MEM_P (x))
1264 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1265 sh_print_operand_address (stream, XEXP (x, 0));
1267 else
1269 rtx sub = NULL_RTX;
1271 mode = GET_MODE (x);
1272 if (mode == VOIDmode)
1273 mode = DImode;
1274 if (GET_MODE_SIZE (mode) >= 8)
1275 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1276 if (sub)
1277 sh_print_operand (stream, sub, 0);
1278 else
1279 output_operand_lossage ("invalid operand to %%R");
1281 break;
1282 case 'S':
1283 if (REG_P (x) || GET_CODE (x) == SUBREG)
1285 regno = true_regnum (x);
1286 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1287 fputs (reg_names[regno], (stream));
1289 else if (MEM_P (x))
1291 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1292 sh_print_operand_address (stream, XEXP (x, 0));
1294 else
1296 rtx sub = NULL_RTX;
1298 mode = GET_MODE (x);
1299 if (mode == VOIDmode)
1300 mode = DImode;
1301 if (GET_MODE_SIZE (mode) >= 8)
1302 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1303 if (sub)
1304 sh_print_operand (stream, sub, 0);
1305 else
1306 output_operand_lossage ("invalid operand to %%S");
1308 break;
1309 case 'T':
1310 /* Next word of a double. */
1311 switch (GET_CODE (x))
1313 case REG:
1314 fputs (reg_names[REGNO (x) + 1], (stream));
1315 break;
1316 case MEM:
1317 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1318 && GET_CODE (XEXP (x, 0)) != POST_INC)
1319 x = adjust_address (x, SImode, 4);
1320 sh_print_operand_address (stream, XEXP (x, 0));
1321 break;
1322 default:
1323 break;
1325 break;
1327 case 't':
1328 gcc_assert (MEM_P (x));
1329 x = XEXP (x, 0);
1330 switch (GET_CODE (x))
1332 case REG:
1333 case SUBREG:
1334 sh_print_operand (stream, x, 0);
1335 break;
1336 default:
1337 break;
1339 break;
1341 case 'o':
1342 switch (GET_CODE (x))
1344 case PLUS: fputs ("add", stream); break;
1345 case MINUS: fputs ("sub", stream); break;
1346 case MULT: fputs ("mul", stream); break;
1347 case DIV: fputs ("div", stream); break;
1348 case EQ: fputs ("eq", stream); break;
1349 case NE: fputs ("ne", stream); break;
1350 case GT: case LT: fputs ("gt", stream); break;
1351 case GE: case LE: fputs ("ge", stream); break;
1352 case GTU: case LTU: fputs ("gtu", stream); break;
1353 case GEU: case LEU: fputs ("geu", stream); break;
1354 default:
1355 break;
1357 break;
1358 case 'M':
1359 if (TARGET_SHMEDIA)
1361 if (MEM_P (x)
1362 && GET_CODE (XEXP (x, 0)) == PLUS
1363 && (REG_P (XEXP (XEXP (x, 0), 1))
1364 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1365 fputc ('x', stream);
1367 else
1369 if (MEM_P (x))
1371 switch (GET_MODE (x))
1373 case QImode: fputs (".b", stream); break;
1374 case HImode: fputs (".w", stream); break;
1375 case SImode: fputs (".l", stream); break;
1376 case SFmode: fputs (".s", stream); break;
1377 case DFmode: fputs (".d", stream); break;
1378 default: gcc_unreachable ();
1382 break;
1384 case 'm':
1385 gcc_assert (MEM_P (x));
1386 x = XEXP (x, 0);
1387 /* Fall through. */
1388 case 'U':
1389 switch (GET_CODE (x))
1391 case REG:
1392 case SUBREG:
1393 sh_print_operand (stream, x, 0);
1394 fputs (", 0", stream);
1395 break;
1397 case PLUS:
1398 sh_print_operand (stream, XEXP (x, 0), 0);
1399 fputs (", ", stream);
1400 sh_print_operand (stream, XEXP (x, 1), 0);
1401 break;
1403 default:
1404 gcc_unreachable ();
1406 break;
1408 case 'V':
1410 int num = exact_log2 (INTVAL (x));
1411 gcc_assert (num >= 0);
1412 fprintf (stream, "#%d", num);
1414 break;
1416 case 'W':
1418 int num = exact_log2 (~INTVAL (x));
1419 gcc_assert (num >= 0);
1420 fprintf (stream, "#%d", num);
1422 break;
1424 case 'd':
1425 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1427 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1428 break;
1430 case 'N':
1431 if (x == CONST0_RTX (GET_MODE (x)))
1433 fprintf ((stream), "r63");
1434 break;
1436 goto default_output;
1437 case 'u':
1438 if (CONST_INT_P (x))
1440 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1441 break;
1443 /* Fall through. */
1445 default_output:
1446 default:
1447 regno = 0;
1448 mode = GET_MODE (x);
1450 switch (GET_CODE (x))
1452 case TRUNCATE:
1454 rtx inner = XEXP (x, 0);
1455 int offset = 0;
1456 enum machine_mode inner_mode;
1458 /* We might see SUBREGs with vector mode registers inside. */
1459 if (GET_CODE (inner) == SUBREG
1460 && (GET_MODE_SIZE (GET_MODE (inner))
1461 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1462 && subreg_lowpart_p (inner))
1463 inner = SUBREG_REG (inner);
1464 if (CONST_INT_P (inner))
1466 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1467 goto default_output;
1469 inner_mode = GET_MODE (inner);
1470 if (GET_CODE (inner) == SUBREG
1471 && (GET_MODE_SIZE (GET_MODE (inner))
1472 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1473 && REG_P (SUBREG_REG (inner)))
1475 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1476 GET_MODE (SUBREG_REG (inner)),
1477 SUBREG_BYTE (inner),
1478 GET_MODE (inner));
1479 inner = SUBREG_REG (inner);
1481 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1482 abort ();
1483 /* Floating point register pairs are always big endian;
1484 general purpose registers are 64 bit wide. */
1485 regno = REGNO (inner);
1486 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1487 - HARD_REGNO_NREGS (regno, mode))
1488 + offset;
1489 x = inner;
1490 goto reg;
1492 case SIGN_EXTEND:
1493 x = XEXP (x, 0);
1494 goto reg;
1495 /* FIXME: We need this on SHmedia32 because reload generates
1496 some sign-extended HI or QI loads into DImode registers
1497 but, because Pmode is SImode, the address ends up with a
1498 subreg:SI of the DImode register. Maybe reload should be
1499 fixed so as to apply alter_subreg to such loads? */
1500 case IF_THEN_ELSE:
1501 gcc_assert (trapping_target_operand (x, VOIDmode));
1502 x = XEXP (XEXP (x, 2), 0);
1503 goto default_output;
1504 case SUBREG:
1505 gcc_assert (SUBREG_BYTE (x) == 0
1506 && REG_P (SUBREG_REG (x)));
1508 x = SUBREG_REG (x);
1509 /* Fall through. */
1511 reg:
1512 case REG:
1513 regno += REGNO (x);
1514 if (FP_REGISTER_P (regno)
1515 && mode == V16SFmode)
1516 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1517 else if (FP_REGISTER_P (REGNO (x))
1518 && mode == V4SFmode)
1519 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1520 else if (REG_P (x)
1521 && mode == V2SFmode)
1522 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1523 else if (FP_REGISTER_P (REGNO (x))
1524 && GET_MODE_SIZE (mode) > 4)
1525 fprintf ((stream), "d%s", reg_names[regno] + 1);
1526 else
1527 fputs (reg_names[regno], (stream));
1528 break;
1530 case MEM:
1531 output_address (XEXP (x, 0));
1532 break;
1534 default:
1535 if (TARGET_SH1)
1536 fputc ('#', stream);
1537 output_addr_const (stream, x);
1538 break;
1540 break;
1544 static bool
1545 sh_print_operand_punct_valid_p (unsigned char code)
1547 return (code == '.' || code == '#' || code == '@' || code == ','
1548 || code == '$' || code == '\'' || code == '>');
1551 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1552 static bool
1553 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1555 if (GET_CODE (x) == UNSPEC)
1557 switch (XINT (x, 1))
1559 case UNSPEC_DATALABEL:
1560 fputs ("datalabel ", file);
1561 output_addr_const (file, XVECEXP (x, 0, 0));
1562 break;
1563 case UNSPEC_PIC:
1564 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1565 output_addr_const (file, XVECEXP (x, 0, 0));
1566 break;
1567 case UNSPEC_GOT:
1568 output_addr_const (file, XVECEXP (x, 0, 0));
1569 fputs ("@GOT", file);
1570 break;
1571 case UNSPEC_GOTOFF:
1572 output_addr_const (file, XVECEXP (x, 0, 0));
1573 fputs ("@GOTOFF", file);
1574 break;
1575 case UNSPEC_PLT:
1576 output_addr_const (file, XVECEXP (x, 0, 0));
1577 fputs ("@PLT", file);
1578 break;
1579 case UNSPEC_GOTPLT:
1580 output_addr_const (file, XVECEXP (x, 0, 0));
1581 fputs ("@GOTPLT", file);
1582 break;
1583 case UNSPEC_DTPOFF:
1584 output_addr_const (file, XVECEXP (x, 0, 0));
1585 fputs ("@DTPOFF", file);
1586 break;
1587 case UNSPEC_GOTTPOFF:
1588 output_addr_const (file, XVECEXP (x, 0, 0));
1589 fputs ("@GOTTPOFF", file);
1590 break;
1591 case UNSPEC_TPOFF:
1592 output_addr_const (file, XVECEXP (x, 0, 0));
1593 fputs ("@TPOFF", file);
1594 break;
1595 case UNSPEC_CALLER:
1597 char name[32];
1598 /* LPCS stands for Label for PIC Call Site. */
1599 targetm.asm_out.generate_internal_label (name, "LPCS",
1600 INTVAL (XVECEXP (x, 0, 0)));
1601 assemble_name (file, name);
1603 break;
1604 case UNSPEC_EXTRACT_S16:
1605 case UNSPEC_EXTRACT_U16:
1607 rtx val, shift;
1609 val = XVECEXP (x, 0, 0);
1610 shift = XVECEXP (x, 0, 1);
1611 fputc ('(', file);
1612 if (shift != const0_rtx)
1613 fputc ('(', file);
1614 if (GET_CODE (val) == CONST
1615 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1617 fputc ('(', file);
1618 output_addr_const (file, val);
1619 fputc (')', file);
1621 else
1622 output_addr_const (file, val);
1623 if (shift != const0_rtx)
1625 fputs (" >> ", file);
1626 output_addr_const (file, shift);
1627 fputc (')', file);
1629 fputs (" & 65535)", file);
1631 break;
1632 case UNSPEC_SYMOFF:
1633 output_addr_const (file, XVECEXP (x, 0, 0));
1634 fputc ('-', file);
1635 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1637 fputc ('(', file);
1638 output_addr_const (file, XVECEXP (x, 0, 1));
1639 fputc (')', file);
1641 else
1642 output_addr_const (file, XVECEXP (x, 0, 1));
1643 break;
1644 case UNSPEC_PCREL_SYMOFF:
1645 output_addr_const (file, XVECEXP (x, 0, 0));
1646 fputs ("-(", file);
1647 output_addr_const (file, XVECEXP (x, 0, 1));
1648 fputs ("-.)", file);
1649 break;
1650 default:
1651 return false;
1653 return true;
1655 else
1656 return false;
1659 /* Encode symbol attributes of a SYMBOL_REF into its
1660 SYMBOL_REF_FLAGS. */
1661 static void
1662 sh_encode_section_info (tree decl, rtx rtl, int first)
1664 default_encode_section_info (decl, rtl, first);
1666 if (TREE_CODE (decl) == FUNCTION_DECL
1667 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1668 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1671 /* Prepare operands for a move define_expand; specifically, one of the
1672 operands must be in a register. */
1673 void
1674 prepare_move_operands (rtx operands[], enum machine_mode mode)
1676 if ((mode == SImode || mode == DImode)
1677 && flag_pic
1678 && ! ((mode == Pmode || mode == ptr_mode)
1679 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1681 rtx temp;
1682 if (SYMBOLIC_CONST_P (operands[1]))
1684 if (MEM_P (operands[0]))
1685 operands[1] = force_reg (Pmode, operands[1]);
1686 else if (TARGET_SHMEDIA
1687 && GET_CODE (operands[1]) == LABEL_REF
1688 && target_reg_operand (operands[0], mode))
1689 /* It's ok. */;
1690 else
1692 temp = (!can_create_pseudo_p ()
1693 ? operands[0]
1694 : gen_reg_rtx (Pmode));
1695 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1698 else if (GET_CODE (operands[1]) == CONST
1699 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1700 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1702 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1703 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1704 mode, temp);
1705 operands[1] = expand_binop (mode, add_optab, temp,
1706 XEXP (XEXP (operands[1], 0), 1),
1707 (!can_create_pseudo_p ()
1708 ? temp
1709 : gen_reg_rtx (Pmode)),
1710 0, OPTAB_LIB_WIDEN);
1714 if (! reload_in_progress && ! reload_completed)
1716 /* Copy the source to a register if both operands aren't registers. */
1717 if (! register_operand (operands[0], mode)
1718 && ! sh_register_operand (operands[1], mode))
1719 operands[1] = copy_to_mode_reg (mode, operands[1]);
1721 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1723 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1724 except that we can't use that function because it is static. */
1725 rtx new_rtx = change_address (operands[0], mode, 0);
1726 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1727 operands[0] = new_rtx;
1730 /* This case can happen while generating code to move the result
1731 of a library call to the target. Reject `st r0,@(rX,rY)' because
1732 reload will fail to find a spill register for rX, since r0 is already
1733 being used for the source. */
1734 else if (TARGET_SH1
1735 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1736 && MEM_P (operands[0])
1737 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1738 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1739 operands[1] = copy_to_mode_reg (mode, operands[1]);
1742 if (mode == Pmode || mode == ptr_mode)
1744 rtx op0, op1, opc;
1745 enum tls_model tls_kind;
1747 op0 = operands[0];
1748 op1 = operands[1];
1749 if (GET_CODE (op1) == CONST
1750 && GET_CODE (XEXP (op1, 0)) == PLUS
1751 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1752 != TLS_MODEL_NONE))
1754 opc = XEXP (XEXP (op1, 0), 1);
1755 op1 = XEXP (XEXP (op1, 0), 0);
1757 else
1758 opc = NULL_RTX;
1760 if (! reload_in_progress && ! reload_completed
1761 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1763 rtx tga_op1, tga_ret, tmp, tmp2;
1765 if (! flag_pic
1766 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1767 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1768 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1770 /* Don't schedule insns for getting GOT address when
1771 the first scheduling is enabled, to avoid spill
1772 failures for R0. */
1773 if (flag_schedule_insns)
1774 emit_insn (gen_blockage ());
1775 emit_insn (gen_GOTaddr2picreg ());
1776 emit_use (gen_rtx_REG (SImode, PIC_REG));
1777 if (flag_schedule_insns)
1778 emit_insn (gen_blockage ());
1781 switch (tls_kind)
1783 case TLS_MODEL_GLOBAL_DYNAMIC:
1784 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1785 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1786 tmp = gen_reg_rtx (Pmode);
1787 emit_move_insn (tmp, tga_ret);
1788 op1 = tmp;
1789 break;
1791 case TLS_MODEL_LOCAL_DYNAMIC:
1792 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1793 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1795 tmp = gen_reg_rtx (Pmode);
1796 emit_move_insn (tmp, tga_ret);
1798 if (register_operand (op0, Pmode))
1799 tmp2 = op0;
1800 else
1801 tmp2 = gen_reg_rtx (Pmode);
1803 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1804 op1 = tmp2;
1805 break;
1807 case TLS_MODEL_INITIAL_EXEC:
1808 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1809 tmp = gen_sym2GOTTPOFF (op1);
1810 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1811 op1 = tga_op1;
1812 break;
1814 case TLS_MODEL_LOCAL_EXEC:
1815 tmp2 = gen_reg_rtx (Pmode);
1816 emit_insn (gen_store_gbr (tmp2));
1817 tmp = gen_reg_rtx (Pmode);
1818 emit_insn (gen_symTPOFF2reg (tmp, op1));
1820 if (register_operand (op0, Pmode))
1821 op1 = op0;
1822 else
1823 op1 = gen_reg_rtx (Pmode);
1825 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1826 break;
1828 default:
1829 gcc_unreachable ();
1831 if (opc)
1832 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1833 operands[1] = op1;
1838 /* Implement the canonicalize_comparison target hook for the combine
1839 pass. For the target hook this function is invoked via
1840 sh_canonicalize_comparison. This function is also re-used to
1841 canonicalize comparisons in cbranch pattern expanders. */
1842 static void
1843 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1844 enum machine_mode mode,
1845 bool op0_preserve_value)
1847 /* When invoked from within the combine pass the mode is not specified,
1848 so try to get it from one of the operands. */
1849 if (mode == VOIDmode)
1850 mode = GET_MODE (op0);
1851 if (mode == VOIDmode)
1852 mode = GET_MODE (op1);
1854 // We need to have a mode to do something useful here.
1855 if (mode == VOIDmode)
1856 return;
1858 // Currently, we don't deal with floats here.
1859 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1860 return;
1862 // Make sure that the constant operand is the second operand.
1863 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1865 if (op0_preserve_value)
1866 return;
1868 std::swap (op0, op1);
1869 cmp = swap_condition (cmp);
1872 if (CONST_INT_P (op1))
1874 /* Try to adjust the constant operand in such a way that available
1875 comparison insns can be utilized better and the constant can be
1876 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1877 constant pool. */
1878 const HOST_WIDE_INT val = INTVAL (op1);
1880 /* x > -1 --> x >= 0
1881 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1882 x <= -1 --> x < 0
1883 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1884 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1886 cmp = cmp == GT ? GE : LT;
1887 op1 = gen_int_mode (val + 1, mode);
1890 /* x >= 1 --> x > 0
1891 x >= 0x80 --> x > 0x7F
1892 x < 1 --> x <= 0
1893 x < 0x80 --> x <= 0x7F */
1894 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1896 cmp = cmp == GE ? GT : LE;
1897 op1 = gen_int_mode (val - 1, mode);
1900 /* unsigned x >= 1 --> x != 0
1901 unsigned x < 1 --> x == 0 */
1902 else if (val == 1 && (cmp == GEU || cmp == LTU))
1904 cmp = cmp == GEU ? NE : EQ;
1905 op1 = CONST0_RTX (mode);
1908 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1909 unsigned x < 0x80 --> unsigned x < 0x7F */
1910 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1912 cmp = cmp == GEU ? GTU : LEU;
1913 op1 = gen_int_mode (val - 1, mode);
1916 /* unsigned x > 0 --> x != 0
1917 unsigned x <= 0 --> x == 0 */
1918 else if (val == 0 && (cmp == GTU || cmp == LEU))
1919 cmp = cmp == GTU ? NE : EQ;
1921 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1922 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1923 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1924 && val == 0x7FFFFFFF)
1926 cmp = cmp == GTU ? LT : GE;
1927 op1 = const0_rtx;
1930 /* unsigned x >= 0x80000000 --> signed x < 0
1931 unsigned x < 0x80000000 --> signed x >= 0 */
1932 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1933 && (unsigned HOST_WIDE_INT)val
1934 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1936 cmp = cmp == GEU ? LT : GE;
1937 op1 = const0_rtx;
1942 /* This function implements the canonicalize_comparison target hook.
1943 This wrapper around the internally used sh_canonicalize_comparison
1944 function is needed to do the enum rtx_code <-> int conversion.
1945 Target hooks cannot use enum rtx_code in its definition. */
1946 static void
1947 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1948 bool op0_preserve_value)
1950 enum rtx_code tmp_code = (enum rtx_code)*code;
1951 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1952 VOIDmode, op0_preserve_value);
1953 *code = (int)tmp_code;
1956 bool
1957 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1959 *p1 = T_REG;
1960 *p2 = INVALID_REGNUM;
1961 return true;
1964 enum rtx_code
1965 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1966 enum rtx_code comparison)
1968 /* The scratch reg is only available when this is invoked from within
1969 the cbranchdi4_i splitter, through expand_cbranchdi4. */
1970 rtx scratch = NULL_RTX;
1972 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1973 comparison = GET_CODE (operands[0]);
1974 else
1975 scratch = operands[4];
1977 sh_canonicalize_comparison (comparison, operands[1], operands[2],
1978 mode, false);
1980 /* Notice that this function is also invoked after reload by
1981 the cbranchdi4_i pattern, through expand_cbranchdi4. */
1982 rtx op1 = operands[1];
1984 if (can_create_pseudo_p ())
1985 operands[1] = force_reg (mode, op1);
1986 /* When we are handling DImode comparisons, we want to keep constants so
1987 that we can optimize the component comparisons; however, memory loads
1988 are better issued as a whole so that they can be scheduled well.
1989 SImode equality comparisons allow I08 constants, but only when they
1990 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1991 into a register, that register might as well be r0, and we allow the
1992 constant. If it is already in a register, this is likely to be
1993 allocated to a different hard register, thus we load the constant into
1994 a register unless it is zero. */
1995 if (!REG_P (operands[2])
1996 && (!CONST_INT_P (operands[2])
1997 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1998 && ((comparison != EQ && comparison != NE)
1999 || (REG_P (op1) && REGNO (op1) != R0_REG)
2000 || !satisfies_constraint_I08 (operands[2])))))
2002 if (scratch && GET_MODE (scratch) == mode)
2004 emit_move_insn (scratch, operands[2]);
2005 operands[2] = scratch;
2007 else if (can_create_pseudo_p ())
2008 operands[2] = force_reg (mode, operands[2]);
2010 return comparison;
2013 void
2014 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2016 rtx (*branch_expander) (rtx) = gen_branch_true;
2017 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2018 switch (comparison)
2020 case NE: case LT: case LE: case LTU: case LEU:
2021 comparison = reverse_condition (comparison);
2022 branch_expander = gen_branch_false;
2023 default: ;
2025 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
2026 gen_rtx_fmt_ee (comparison, SImode,
2027 operands[1], operands[2])));
2028 rtx jump = emit_jump_insn (branch_expander (operands[3]));
2029 if (probability >= 0)
2030 add_int_reg_note (jump, REG_BR_PROB, probability);
2033 /* ??? How should we distribute probabilities when more than one branch
2034 is generated. So far we only have some ad-hoc observations:
2035 - If the operands are random, they are likely to differ in both parts.
2036 - If comparing items in a hash chain, the operands are random or equal;
2037 operation should be EQ or NE.
2038 - If items are searched in an ordered tree from the root, we can expect
2039 the highpart to be unequal about half of the time; operation should be
2040 an inequality comparison, operands non-constant, and overall probability
2041 about 50%. Likewise for quicksort.
2042 - Range checks will be often made against constants. Even if we assume for
2043 simplicity an even distribution of the non-constant operand over a
2044 sub-range here, the same probability could be generated with differently
2045 wide sub-ranges - as long as the ratio of the part of the subrange that
2046 is before the threshold to the part that comes after the threshold stays
2047 the same. Thus, we can't really tell anything here;
2048 assuming random distribution is at least simple.
2050 bool
2051 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2053 enum rtx_code msw_taken, msw_skip, lsw_taken;
2054 rtx skip_label = NULL_RTX;
2055 rtx op1h, op1l, op2h, op2l;
2056 int num_branches;
2057 int prob, rev_prob;
2058 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2059 rtx scratch = operands[4];
2061 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2062 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2063 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2064 op1l = gen_lowpart (SImode, operands[1]);
2065 op2l = gen_lowpart (SImode, operands[2]);
2066 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2067 prob = split_branch_probability;
2068 rev_prob = REG_BR_PROB_BASE - prob;
2069 switch (comparison)
2071 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2072 That costs 1 cycle more when the first branch can be predicted taken,
2073 but saves us mispredicts because only one branch needs prediction.
2074 It also enables generating the cmpeqdi_t-1 pattern. */
2075 case EQ:
2076 if (TARGET_CMPEQDI_T)
2078 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2079 emit_jump_insn (gen_branch_true (operands[3]));
2080 return true;
2082 msw_skip = NE;
2083 lsw_taken = EQ;
2084 if (prob >= 0)
2086 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2087 msw_skip_prob = rev_prob;
2088 if (REG_BR_PROB_BASE <= 65535)
2089 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2090 else
2092 lsw_taken_prob
2093 = (prob
2094 ? (REG_BR_PROB_BASE
2095 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2096 / ((gcov_type) prob << 32)))
2097 : 0);
2100 break;
2101 case NE:
2102 if (TARGET_CMPEQDI_T)
2104 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2105 emit_jump_insn (gen_branch_false (operands[3]));
2106 return true;
2108 msw_taken = NE;
2109 msw_taken_prob = prob;
2110 lsw_taken = NE;
2111 lsw_taken_prob = 0;
2112 break;
2113 case GTU: case GT:
2114 msw_taken = comparison;
2115 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2116 break;
2117 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2118 msw_skip = swap_condition (msw_taken);
2119 lsw_taken = GTU;
2120 break;
2121 case GEU: case GE:
2122 if (op2l == CONST0_RTX (SImode))
2123 msw_taken = comparison;
2124 else
2126 msw_taken = comparison == GE ? GT : GTU;
2127 msw_skip = swap_condition (msw_taken);
2128 lsw_taken = GEU;
2130 break;
2131 case LTU: case LT:
2132 msw_taken = comparison;
2133 if (op2l == CONST0_RTX (SImode))
2134 break;
2135 msw_skip = swap_condition (msw_taken);
2136 lsw_taken = LTU;
2137 break;
2138 case LEU: case LE:
2139 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2140 msw_taken = comparison;
2141 else
2143 lsw_taken = LEU;
2144 if (comparison == LE)
2145 msw_taken = LT;
2146 else if (op2h != CONST0_RTX (SImode))
2147 msw_taken = LTU;
2148 else
2150 msw_skip = swap_condition (LTU);
2151 break;
2153 msw_skip = swap_condition (msw_taken);
2155 break;
2156 default: return false;
2158 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2159 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2160 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2161 if (comparison != EQ && comparison != NE && num_branches > 1)
2163 if (!CONSTANT_P (operands[2])
2164 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2165 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2167 msw_taken_prob = prob / 2U;
2168 msw_skip_prob
2169 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2170 lsw_taken_prob = prob;
2172 else
2174 msw_taken_prob = prob;
2175 msw_skip_prob = REG_BR_PROB_BASE;
2176 /* ??? If we have a constant op2h, should we use that when
2177 calculating lsw_taken_prob? */
2178 lsw_taken_prob = prob;
2181 operands[1] = op1h;
2182 operands[2] = op2h;
2183 operands[4] = NULL_RTX;
2184 if (reload_completed
2185 && ! arith_reg_or_0_operand (op2h, SImode)
2186 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2187 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2188 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2190 emit_move_insn (scratch, operands[2]);
2191 operands[2] = scratch;
2193 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2194 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2195 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2197 rtx taken_label = operands[3];
2199 /* Operands were possibly modified, but msw_skip doesn't expect this.
2200 Always use the original ones. */
2201 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2203 operands[1] = op1h;
2204 operands[2] = op2h;
2205 if (reload_completed
2206 && ! arith_reg_or_0_operand (op2h, SImode)
2207 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2209 emit_move_insn (scratch, operands[2]);
2210 operands[2] = scratch;
2214 operands[3] = skip_label = gen_label_rtx ();
2215 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2216 operands[3] = taken_label;
2218 operands[1] = op1l;
2219 operands[2] = op2l;
2220 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2222 if (reload_completed
2223 && ! arith_reg_or_0_operand (op2l, SImode)
2224 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2226 emit_move_insn (scratch, operands[2]);
2227 operands[2] = scratch;
2229 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2231 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2232 emit_label (skip_label);
2233 return true;
2236 /* Given an operand, return 1 if the evaluated operand plugged into an
2237 if_then_else will result in a branch_true, 0 if branch_false, or
2238 -1 if neither nor applies. The truth table goes like this:
2240 op | cmpval | code | result
2241 ---------+--------+---------+--------------------
2242 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2243 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2244 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2245 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2246 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2247 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2248 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2249 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2251 sh_eval_treg_value (rtx op)
2253 if (t_reg_operand (op, GET_MODE (op)))
2254 return 1;
2255 if (negt_reg_operand (op, GET_MODE (op)))
2256 return 0;
2258 rtx_code code = GET_CODE (op);
2259 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2260 return -1;
2262 int cmpop = code == EQ ? 1 : 0;
2263 int cmpval = INTVAL (XEXP (op, 1));
2264 if (cmpval != 0 && cmpval != 1)
2265 return -1;
2267 int t;
2268 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2269 t = 0;
2270 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2271 t = 1;
2272 else
2273 return -1;
2275 return t ^ (cmpval == cmpop);
2278 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2280 static void
2281 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2283 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2285 insn = gen_rtx_PARALLEL (VOIDmode,
2286 gen_rtvec (2, insn,
2287 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2288 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2290 else
2291 emit_insn (insn);
2294 /* Prepare the operands for an scc instruction; make sure that the
2295 compare has been done and the result is in T_REG. */
2296 void
2297 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2299 rtx t_reg = get_t_reg_rtx ();
2300 enum rtx_code oldcode = code;
2301 enum machine_mode mode;
2303 /* First need a compare insn. */
2304 switch (code)
2306 case NE:
2307 /* It isn't possible to handle this case. */
2308 gcc_unreachable ();
2309 case LT:
2310 code = GT;
2311 break;
2312 case LE:
2313 code = GE;
2314 break;
2315 case LTU:
2316 code = GTU;
2317 break;
2318 case LEU:
2319 code = GEU;
2320 break;
2321 default:
2322 break;
2324 if (code != oldcode)
2326 rtx tmp = op0;
2327 op0 = op1;
2328 op1 = tmp;
2331 mode = GET_MODE (op0);
2332 if (mode == VOIDmode)
2333 mode = GET_MODE (op1);
2335 op0 = force_reg (mode, op0);
2336 if ((code != EQ && code != NE
2337 && (op1 != const0_rtx
2338 || code == GTU || code == GEU || code == LTU || code == LEU))
2339 || (mode == DImode && op1 != const0_rtx)
2340 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2341 op1 = force_reg (mode, op1);
2343 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2344 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2345 mode);
2349 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2350 rtx op0, rtx op1)
2352 rtx target = gen_reg_rtx (SImode);
2353 rtx tmp;
2355 gcc_assert (TARGET_SHMEDIA);
2356 switch (code)
2358 case EQ:
2359 case GT:
2360 case LT:
2361 case UNORDERED:
2362 case GTU:
2363 case LTU:
2364 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2365 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2366 code = NE;
2367 break;
2369 case NE:
2370 case GE:
2371 case LE:
2372 case ORDERED:
2373 case GEU:
2374 case LEU:
2375 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2376 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2377 code = EQ;
2378 break;
2380 case UNEQ:
2381 case UNGE:
2382 case UNGT:
2383 case UNLE:
2384 case UNLT:
2385 case LTGT:
2386 return NULL_RTX;
2388 default:
2389 gcc_unreachable ();
2392 if (mode == DImode)
2394 rtx t2 = gen_reg_rtx (DImode);
2395 emit_insn (gen_extendsidi2 (t2, target));
2396 target = t2;
2399 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2402 /* Called from the md file, set up the operands of a compare instruction. */
2403 void
2404 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2406 enum rtx_code code = GET_CODE (operands[0]);
2407 enum rtx_code branch_code;
2408 rtx op0 = operands[1];
2409 rtx op1 = operands[2];
2410 rtx insn, tem;
2411 bool need_ccmpeq = false;
2413 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2415 op0 = force_reg (mode, op0);
2416 op1 = force_reg (mode, op1);
2418 else
2420 if (code != EQ || mode == DImode)
2422 /* Force args into regs, since we can't use constants here. */
2423 op0 = force_reg (mode, op0);
2424 if (op1 != const0_rtx || code == GTU || code == GEU)
2425 op1 = force_reg (mode, op1);
2429 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2431 if (code == LT
2432 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2433 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2435 tem = op0, op0 = op1, op1 = tem;
2436 code = swap_condition (code);
2439 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2440 if (code == GE)
2442 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2443 need_ccmpeq = true;
2444 code = GT;
2447 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2448 to EQ/GT respectively. */
2449 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2452 switch (code)
2454 case EQ:
2455 case GT:
2456 case GE:
2457 case GTU:
2458 case GEU:
2459 branch_code = code;
2460 break;
2461 case NE:
2462 case LT:
2463 case LE:
2464 case LTU:
2465 case LEU:
2466 branch_code = reverse_condition (code);
2467 break;
2468 default:
2469 gcc_unreachable ();
2472 insn = gen_rtx_SET (VOIDmode,
2473 get_t_reg_rtx (),
2474 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2476 sh_emit_set_t_insn (insn, mode);
2477 if (need_ccmpeq)
2478 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2480 if (branch_code == code)
2481 emit_jump_insn (gen_branch_true (operands[3]));
2482 else
2483 emit_jump_insn (gen_branch_false (operands[3]));
2486 void
2487 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2489 enum rtx_code code = GET_CODE (operands[1]);
2490 rtx op0 = operands[2];
2491 rtx op1 = operands[3];
2492 rtx lab = NULL_RTX;
2493 bool invert = false;
2494 rtx tem;
2496 op0 = force_reg (mode, op0);
2497 if ((code != EQ && code != NE
2498 && (op1 != const0_rtx
2499 || code == GTU || code == GEU || code == LTU || code == LEU))
2500 || (mode == DImode && op1 != const0_rtx)
2501 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2502 op1 = force_reg (mode, op1);
2504 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2506 if (code == LT || code == LE)
2508 code = swap_condition (code);
2509 tem = op0, op0 = op1, op1 = tem;
2511 if (code == GE)
2513 if (TARGET_IEEE)
2515 lab = gen_label_rtx ();
2516 sh_emit_scc_to_t (EQ, op0, op1);
2517 emit_jump_insn (gen_branch_true (lab));
2518 code = GT;
2520 else
2522 code = LT;
2523 invert = true;
2528 if (code == NE)
2530 code = EQ;
2531 invert = true;
2534 sh_emit_scc_to_t (code, op0, op1);
2535 if (lab)
2536 emit_label (lab);
2537 if (invert)
2538 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2539 else
2540 emit_move_insn (operands[0], get_t_reg_rtx ());
2543 /* Functions to output assembly code. */
2545 /* Return a sequence of instructions to perform DI or DF move.
2547 Since the SH cannot move a DI or DF in one instruction, we have
2548 to take care when we see overlapping source and dest registers. */
2549 const char *
2550 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2551 enum machine_mode mode)
2553 rtx dst = operands[0];
2554 rtx src = operands[1];
2556 if (MEM_P (dst)
2557 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2558 return "mov.l %T1,%0" "\n"
2559 " mov.l %1,%0";
2561 if (register_operand (dst, mode)
2562 && register_operand (src, mode))
2564 if (REGNO (src) == MACH_REG)
2565 return "sts mach,%S0" "\n"
2566 " sts macl,%R0";
2568 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2569 when mov.d r1,r0 do r1->r0 then r2->r1. */
2570 if (REGNO (src) + 1 == REGNO (dst))
2571 return "mov %T1,%T0" "\n"
2572 " mov %1,%0";
2573 else
2574 return "mov %1,%0" "\n"
2575 " mov %T1,%T0";
2577 else if (CONST_INT_P (src))
2579 if (INTVAL (src) < 0)
2580 output_asm_insn ("mov #-1,%S0", operands);
2581 else
2582 output_asm_insn ("mov #0,%S0", operands);
2584 return "mov %1,%R0";
2586 else if (MEM_P (src))
2588 int ptrreg = -1;
2589 int dreg = REGNO (dst);
2590 rtx inside = XEXP (src, 0);
2592 switch (GET_CODE (inside))
2594 case REG:
2595 ptrreg = REGNO (inside);
2596 break;
2598 case SUBREG:
2599 ptrreg = subreg_regno (inside);
2600 break;
2602 case PLUS:
2603 ptrreg = REGNO (XEXP (inside, 0));
2604 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2605 an offsettable address. Unfortunately, offsettable addresses use
2606 QImode to check the offset, and a QImode offsettable address
2607 requires r0 for the other operand, which is not currently
2608 supported, so we can't use the 'o' constraint.
2609 Thus we must check for and handle r0+REG addresses here.
2610 We punt for now, since this is likely very rare. */
2611 gcc_assert (!REG_P (XEXP (inside, 1)));
2612 break;
2614 case LABEL_REF:
2615 return "mov.l %1,%0" "\n"
2616 " mov.l %1+4,%T0";
2617 case POST_INC:
2618 return "mov.l %1,%0" "\n"
2619 " mov.l %1,%T0";
2620 default:
2621 gcc_unreachable ();
2624 /* Work out the safe way to copy. Copy into the second half first. */
2625 if (dreg == ptrreg)
2626 return "mov.l %T1,%T0" "\n"
2627 " mov.l %1,%0";
2630 return "mov.l %1,%0" "\n"
2631 " mov.l %T1,%T0";
2634 /* Print an instruction which would have gone into a delay slot after
2635 another instruction, but couldn't because the other instruction expanded
2636 into a sequence where putting the slot insn at the end wouldn't work. */
2637 static void
2638 print_slot (rtx insn)
2640 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2642 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2645 const char *
2646 output_far_jump (rtx insn, rtx op)
2648 struct { rtx lab, reg, op; } this_jmp;
2649 rtx braf_base_lab = NULL_RTX;
2650 const char *jump;
2651 int far;
2652 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2653 rtx prev;
2655 this_jmp.lab = gen_label_rtx ();
2657 if (TARGET_SH2
2658 && offset >= -32764
2659 && offset - get_attr_length (insn) <= 32766)
2661 far = 0;
2662 jump = "mov.w %O0,%1" "\n"
2663 " braf %1";
2665 else
2667 far = 1;
2668 if (flag_pic)
2670 if (TARGET_SH2)
2671 jump = "mov.l %O0,%1" "\n"
2672 " braf %1";
2673 else
2674 jump = "mov.l r0,@-r15" "\n"
2675 " mova %O0,r0" "\n"
2676 " mov.l @r0,%1" "\n"
2677 " add r0,%1" "\n"
2678 " mov.l @r15+,r0" "\n"
2679 " jmp @%1";
2681 else
2682 jump = "mov.l %O0,%1" "\n"
2683 " jmp @%1";
2685 /* If we have a scratch register available, use it. */
2686 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2687 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2689 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2690 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2691 jump = "mov.l r1,@-r15" "\n"
2692 " mova %O0,r0" "\n"
2693 " mov.l @r0,r1" "\n"
2694 " add r1,r0" "\n"
2695 " mov.l @r15+,r1" "\n"
2696 " jmp @%1";
2697 output_asm_insn (jump, &this_jmp.lab);
2698 if (dbr_sequence_length ())
2699 print_slot (final_sequence);
2700 else
2701 output_asm_insn ("nop", 0);
2703 else
2705 /* Output the delay slot insn first if any. */
2706 if (dbr_sequence_length ())
2707 print_slot (final_sequence);
2709 this_jmp.reg = gen_rtx_REG (SImode, 13);
2710 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2711 Fortunately, MACL is fixed and call-clobbered, and we never
2712 need its value across jumps, so save r13 in it instead of in
2713 the stack. */
2714 if (TARGET_SH5)
2715 output_asm_insn ("lds r13,macl", 0);
2716 else
2717 output_asm_insn ("mov.l r13,@-r15", 0);
2718 output_asm_insn (jump, &this_jmp.lab);
2719 if (TARGET_SH5)
2720 output_asm_insn ("sts macl,r13", 0);
2721 else
2722 output_asm_insn ("mov.l @r15+,r13", 0);
2724 if (far && flag_pic && TARGET_SH2)
2726 braf_base_lab = gen_label_rtx ();
2727 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2728 CODE_LABEL_NUMBER (braf_base_lab));
2730 if (far)
2731 output_asm_insn (".align 2", 0);
2732 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2733 this_jmp.op = op;
2734 if (far && flag_pic)
2736 if (TARGET_SH2)
2737 this_jmp.lab = braf_base_lab;
2738 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2740 else
2741 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2742 return "";
2745 /* Local label counter, used for constants in the pool and inside
2746 pattern branches. */
2747 static int lf = 100;
2749 /* Output code for ordinary branches. */
2750 const char *
2751 output_branch (int logic, rtx insn, rtx *operands)
2753 switch (get_attr_length (insn))
2755 case 6:
2756 /* This can happen if filling the delay slot has caused a forward
2757 branch to exceed its range (we could reverse it, but only
2758 when we know we won't overextend other branches; this should
2759 best be handled by relaxation).
2760 It can also happen when other condbranches hoist delay slot insn
2761 from their destination, thus leading to code size increase.
2762 But the branch will still be in the range -4092..+4098 bytes. */
2763 if (! TARGET_RELAX)
2765 int label = lf++;
2766 /* The call to print_slot will clobber the operands. */
2767 rtx op0 = operands[0];
2769 /* If the instruction in the delay slot is annulled (true), then
2770 there is no delay slot where we can put it now. The only safe
2771 place for it is after the label. final will do that by default. */
2773 if (final_sequence
2774 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2775 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2777 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2778 ASSEMBLER_DIALECT ? "/" : ".", label);
2779 print_slot (final_sequence);
2781 else
2782 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2784 output_asm_insn ("bra\t%l0", &op0);
2785 fprintf (asm_out_file, "\tnop\n");
2786 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2788 return "";
2790 /* When relaxing, handle this like a short branch. The linker
2791 will fix it up if it still doesn't fit after relaxation. */
2792 case 2:
2793 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2795 /* These are for SH2e, in which we have to account for the
2796 extra nop because of the hardware bug in annulled branches. */
2797 case 8:
2798 if (! TARGET_RELAX)
2800 int label = lf++;
2802 gcc_assert (!final_sequence
2803 || !(INSN_ANNULLED_BRANCH_P
2804 (XVECEXP (final_sequence, 0, 0))));
2805 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2806 logic ? "f" : "t",
2807 ASSEMBLER_DIALECT ? "/" : ".", label);
2808 fprintf (asm_out_file, "\tnop\n");
2809 output_asm_insn ("bra\t%l0", operands);
2810 fprintf (asm_out_file, "\tnop\n");
2811 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2813 return "";
2815 /* When relaxing, fall through. */
2816 case 4:
2818 char buffer[10];
2820 sprintf (buffer, "b%s%ss\t%%l0",
2821 logic ? "t" : "f",
2822 ASSEMBLER_DIALECT ? "/" : ".");
2823 output_asm_insn (buffer, &operands[0]);
2824 return "nop";
2827 default:
2828 /* There should be no longer branches now - that would
2829 indicate that something has destroyed the branches set
2830 up in machine_dependent_reorg. */
2831 gcc_unreachable ();
2835 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2836 fill in operands 9 as a label to the successor insn.
2837 We try to use jump threading where possible.
2838 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2839 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2840 follow jmp and bt, if the address is in range. */
2841 const char *
2842 output_branchy_insn (enum rtx_code code, const char *templ,
2843 rtx insn, rtx *operands)
2845 rtx next_insn = NEXT_INSN (insn);
2847 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2849 rtx src = SET_SRC (PATTERN (next_insn));
2850 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2852 /* Following branch not taken */
2853 operands[9] = gen_label_rtx ();
2854 emit_label_after (operands[9], next_insn);
2855 INSN_ADDRESSES_NEW (operands[9],
2856 INSN_ADDRESSES (INSN_UID (next_insn))
2857 + get_attr_length (next_insn));
2858 return templ;
2860 else
2862 int offset = (branch_dest (next_insn)
2863 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2864 if (offset >= -252 && offset <= 258)
2866 if (GET_CODE (src) == IF_THEN_ELSE)
2867 /* branch_true */
2868 src = XEXP (src, 1);
2869 operands[9] = src;
2870 return templ;
2874 operands[9] = gen_label_rtx ();
2875 emit_label_after (operands[9], insn);
2876 INSN_ADDRESSES_NEW (operands[9],
2877 INSN_ADDRESSES (INSN_UID (insn))
2878 + get_attr_length (insn));
2879 return templ;
2882 const char *
2883 output_ieee_ccmpeq (rtx insn, rtx *operands)
2885 return output_branchy_insn (NE, "bt %l9" "\n"
2886 " fcmp/eq %1,%0",
2887 insn, operands);
2890 /* Output the start of the assembler file. */
2891 static void
2892 sh_file_start (void)
2894 default_file_start ();
2896 if (TARGET_ELF)
2897 /* We need to show the text section with the proper
2898 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2899 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2900 will complain. We can teach GAS specifically about the
2901 default attributes for our choice of text section, but
2902 then we would have to change GAS again if/when we change
2903 the text section name. */
2904 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2905 else
2906 /* Switch to the data section so that the coffsem symbol
2907 isn't in the text section. */
2908 switch_to_section (data_section);
2910 if (TARGET_LITTLE_ENDIAN)
2911 fputs ("\t.little\n", asm_out_file);
2913 if (!TARGET_ELF)
2915 if (TARGET_SHCOMPACT)
2916 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2917 else if (TARGET_SHMEDIA)
2918 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2919 TARGET_SHMEDIA64 ? 64 : 32);
2923 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2924 static bool
2925 unspec_caller_rtx_p (rtx pat)
2927 rtx base, offset;
2928 int i;
2930 split_const (pat, &base, &offset);
2931 if (GET_CODE (base) == UNSPEC)
2933 if (XINT (base, 1) == UNSPEC_CALLER)
2934 return true;
2935 for (i = 0; i < XVECLEN (base, 0); i++)
2936 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2937 return true;
2939 return false;
2942 /* Indicate that INSN cannot be duplicated. This is true for insn
2943 that generates a unique label. */
2944 static bool
2945 sh_cannot_copy_insn_p (rtx insn)
2947 rtx pat;
2949 if (!reload_completed || !flag_pic)
2950 return false;
2952 if (!NONJUMP_INSN_P (insn))
2953 return false;
2954 if (asm_noperands (insn) >= 0)
2955 return false;
2957 pat = PATTERN (insn);
2958 if (GET_CODE (pat) != SET)
2959 return false;
2960 pat = SET_SRC (pat);
2962 if (unspec_caller_rtx_p (pat))
2963 return true;
2965 return false;
2968 /* Number of instructions used to make an arithmetic right shift by N. */
2969 static const char ashiftrt_insns[] =
2970 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2972 /* Description of a logical left or right shift, when expanded to a sequence
2973 of 1/2/8/16 shifts.
2974 Notice that one bit right shifts clobber the T bit. One bit left shifts
2975 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2976 enum
2978 ASHL_CLOBBERS_T = 1 << 0,
2979 LSHR_CLOBBERS_T = 1 << 1
2982 struct ashl_lshr_sequence
2984 char insn_count;
2985 char amount[6];
2986 char clobbers_t;
2989 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2991 { 0, { 0 }, 0 }, // 0
2992 { 1, { 1 }, LSHR_CLOBBERS_T },
2993 { 1, { 2 }, 0 },
2994 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2995 { 2, { 2, 2 }, 0 }, // 4
2996 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2997 { 3, { 2, 2, 2 }, 0 },
2998 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2999 { 1, { 8 }, 0 }, // 8
3000 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3001 { 2, { 8, 2 }, 0 },
3002 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3003 { 3, { 8, 2, 2 }, 0 }, // 12
3004 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3005 { 3, { 8, -2, 8 }, 0 },
3006 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3007 { 1, { 16 }, 0 }, // 16
3008 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3009 { 2, { 16, 2 }, 0 },
3010 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3011 { 3, { 16, 2, 2 }, 0 }, // 20
3012 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3013 { 3, { 16, -2, 8 }, 0 },
3014 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3015 { 2, { 16, 8 }, 0 }, // 24
3016 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3017 { 3, { 16, 8, 2 }, 0 },
3018 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3019 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3020 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3021 { 3, { 16, -2, 16 }, 0 },
3023 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3024 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3025 However, the shift-and combiner code needs this entry here to be in
3026 terms of real shift insns. */
3027 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3030 /* Individual shift amounts for shift amounts < 16, up to three highmost
3031 bits might be clobbered. This is typically used when combined with some
3032 kind of sign or zero extension. */
3033 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3035 { 0, { 0 }, 0 }, // 0
3036 { 1, { 1 }, LSHR_CLOBBERS_T },
3037 { 1, { 2 }, 0 },
3038 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3039 { 2, { 2, 2 }, 0 }, // 4
3040 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3041 { 2, { 8, -2 }, 0 },
3042 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3043 { 1, { 8 }, 0 }, // 8
3044 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3045 { 2, { 8, 2 }, 0 },
3046 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3047 { 3, { 8, 2, 2 }, 0 }, // 12
3048 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3049 { 2, { 16, -2 }, 0 },
3050 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3051 { 1, { 16 }, 0 }, // 16
3052 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3053 { 2, { 16, 2 }, 0 },
3054 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3055 { 3, { 16, 2, 2 }, 0 }, // 20
3056 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3057 { 3, { 16, -2, 8 }, 0 },
3058 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3059 { 2, { 16, 8 }, 0 }, // 24
3060 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3061 { 3, { 16, 8, 2 }, 0 },
3062 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3063 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3064 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3065 { 3, { 16, -2, 16 }, 0 },
3066 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3069 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3070 will clobber the T bit. */
3071 bool
3072 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3074 gcc_assert (CONST_INT_P (shift_amount));
3076 const int shift_amount_i = INTVAL (shift_amount) & 31;
3078 /* Special case for shift count of 31: use and-rotl sequence. */
3079 if (shift_amount_i == 31)
3080 return true;
3082 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3083 & ASHL_CLOBBERS_T) != 0;
3086 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3087 instructions will clobber the T bit. */
3088 bool
3089 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3091 gcc_assert (CONST_INT_P (shift_amount));
3093 const int shift_amount_i = INTVAL (shift_amount) & 31;
3095 /* Special case for shift count of 31: use shll-movt sequence. */
3096 if (shift_amount_i == 31)
3097 return true;
3099 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3100 & LSHR_CLOBBERS_T) != 0;
3103 /* Return true if it is potentially beneficial to use a dynamic shift
3104 instruction (shad / shar) instead of a combination of 1/2/8/16
3105 shift instructions for the specified shift count.
3106 If dynamic shifts are not available, always return false. */
3107 bool
3108 sh_dynamicalize_shift_p (rtx count)
3110 gcc_assert (CONST_INT_P (count));
3112 const int shift_amount_i = INTVAL (count) & 31;
3113 int insn_count;
3115 /* For left and right shifts, there are shorter 2 insn sequences for
3116 shift amounts of 31. */
3117 if (shift_amount_i == 31)
3118 insn_count = 2;
3119 else
3120 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3122 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3125 /* Assuming we have a value that has been sign-extended by at least one bit,
3126 can we use the ext_shift_amounts with the last shift turned to an
3127 arithmetic shift to shift it by N without data loss, and quicker than by
3128 other means? */
3129 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3131 /* Return the cost of a shift. */
3132 static inline int
3133 shiftcosts (rtx x)
3135 int value;
3137 if (TARGET_SHMEDIA)
3138 return 1;
3140 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3142 if (GET_MODE (x) == DImode
3143 && CONST_INT_P (XEXP (x, 1))
3144 && INTVAL (XEXP (x, 1)) == 1)
3145 return 2;
3147 /* Everything else is invalid, because there is no pattern for it. */
3148 return -1;
3150 /* If shift by a non constant, then this will be expensive. */
3151 if (!CONST_INT_P (XEXP (x, 1)))
3152 return SH_DYNAMIC_SHIFT_COST;
3154 /* Otherwise, return the true cost in instructions. Cope with out of range
3155 shift counts more or less arbitrarily. */
3156 value = INTVAL (XEXP (x, 1)) & 31;
3158 if (GET_CODE (x) == ASHIFTRT)
3160 int cost = ashiftrt_insns[value];
3161 /* If dynamic shifts are available and profitable in this case, then we
3162 put the constant in a reg and use shad. */
3163 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3164 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3165 return cost;
3167 else
3168 return ashl_lshr_seq[value].insn_count;
3171 /* Return the cost of an AND/XOR/IOR operation. */
3172 static inline int
3173 and_xor_ior_costs (rtx x, int code)
3175 /* On SH1-4 we have only max. SImode operations.
3176 Double the cost for modes > SImode. */
3177 const int cost_scale = !TARGET_SHMEDIA
3178 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3179 ? 2 : 1;
3181 /* A logical operation with two registers is a single cycle
3182 instruction. */
3183 if (!CONST_INT_P (XEXP (x, 1)))
3184 return 1 * cost_scale;
3186 int i = INTVAL (XEXP (x, 1));
3188 if (TARGET_SHMEDIA)
3190 if (satisfies_constraint_I10 (XEXP (x, 1))
3191 || satisfies_constraint_J16 (XEXP (x, 1)))
3192 return 1;
3193 else
3194 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3197 /* These constants are single cycle extu.[bw] instructions. */
3198 if ((i == 0xff || i == 0xffff) && code == AND)
3199 return 1 * cost_scale;
3200 /* Constants that can be used in an instruction as an immediate are
3201 a single cycle, but this requires r0, so make it a little more
3202 expensive. */
3203 if (CONST_OK_FOR_K08 (i))
3204 return 2 * cost_scale;
3205 /* Constants that can be loaded with a mov immediate need one more cycle.
3206 This case is probably unnecessary. */
3207 if (CONST_OK_FOR_I08 (i))
3208 return 2 * cost_scale;
3209 /* Any other constant requires an additional 2 cycle pc-relative load.
3210 This case is probably unnecessary. */
3211 return 3 * cost_scale;
3214 /* Return the cost of an addition or a subtraction. */
3215 static inline int
3216 addsubcosts (rtx x)
3218 if (GET_MODE (x) == SImode)
3220 /* The addc or subc patterns will eventually become one or two
3221 instructions. Below are some costs for some of the patterns
3222 which combine would reject because the costs of the individual
3223 insns in the patterns are lower.
3225 FIXME: It would be much easier if we had something like insn cost
3226 attributes and the cost calculation machinery used those attributes
3227 in the first place. This would eliminate redundant recog-like C
3228 code to calculate costs of complex patterns. */
3229 rtx op0 = XEXP (x, 0);
3230 rtx op1 = XEXP (x, 1);
3232 if (GET_CODE (x) == PLUS)
3234 if (GET_CODE (op0) == AND
3235 && XEXP (op0, 1) == const1_rtx
3236 && (GET_CODE (op1) == PLUS
3237 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3238 return 1;
3240 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3241 && GET_CODE (op1) == LSHIFTRT
3242 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3243 return 1;
3247 /* On SH1-4 we have only max. SImode operations.
3248 Double the cost for modes > SImode. */
3249 const int cost_scale = !TARGET_SHMEDIA
3250 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3251 ? 2 : 1;
3253 /* Adding a register is a single cycle insn. */
3254 if (REG_P (XEXP (x, 1))
3255 || GET_CODE (XEXP (x, 1)) == SUBREG)
3256 return 1 * cost_scale;
3258 /* Likewise for small constants. */
3259 if (CONST_INT_P (XEXP (x, 1))
3260 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3261 return 1 * cost_scale;
3263 if (TARGET_SHMEDIA)
3264 switch (GET_CODE (XEXP (x, 1)))
3266 case CONST:
3267 case LABEL_REF:
3268 case SYMBOL_REF:
3269 return TARGET_SHMEDIA64 ? 5 : 3;
3271 case CONST_INT:
3272 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3273 return 2;
3274 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3275 return 3;
3276 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3277 return 4;
3279 /* Fall through. */
3280 default:
3281 return 5;
3284 /* Any other constant requires a 2 cycle pc-relative load plus an
3285 addition. */
3286 return 3 * cost_scale;
3289 /* Return the cost of a multiply. */
3290 static inline int
3291 multcosts (rtx x ATTRIBUTE_UNUSED)
3293 if (sh_multcost >= 0)
3294 return sh_multcost;
3295 if (TARGET_SHMEDIA)
3296 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3297 accept constants. Ideally, we would use a cost of one or two and
3298 add the cost of the operand, but disregard the latter when inside loops
3299 and loop invariant code motion is still to follow.
3300 Using a multiply first and splitting it later if it's a loss
3301 doesn't work because of different sign / zero extension semantics
3302 of multiplies vs. shifts. */
3303 return optimize_size ? 2 : 3;
3305 if (TARGET_SH2)
3307 /* We have a mul insn, so we can never take more than the mul and the
3308 read of the mac reg, but count more because of the latency and extra
3309 reg usage. */
3310 if (optimize_size)
3311 return 2;
3312 return 3;
3315 /* If we're aiming at small code, then just count the number of
3316 insns in a multiply call sequence. */
3317 if (optimize_size)
3318 return 5;
3320 /* Otherwise count all the insns in the routine we'd be calling too. */
3321 return 20;
3324 /* Compute a (partial) cost for rtx X. Return true if the complete
3325 cost has been computed, and false if subexpressions should be
3326 scanned. In either case, *TOTAL contains the cost result. */
3327 static bool
3328 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3329 int *total, bool speed ATTRIBUTE_UNUSED)
3331 switch (code)
3333 /* The lower-subreg pass decides whether to split multi-word regs
3334 into individual regs by looking at the cost for a SET of certain
3335 modes with the following patterns:
3336 (set (reg) (reg))
3337 (set (reg) (const_int 0))
3338 On machines that support vector-move operations a multi-word move
3339 is the same cost as individual reg move. On SH there is no
3340 vector-move, so we have to provide the correct cost in the number
3341 of move insns to load/store the reg of the mode in question. */
3342 case SET:
3343 if (register_operand (SET_DEST (x), VOIDmode)
3344 && (register_operand (SET_SRC (x), VOIDmode)
3345 || satisfies_constraint_Z (SET_SRC (x))))
3347 const enum machine_mode mode = GET_MODE (SET_DEST (x));
3348 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3349 / mov_insn_size (mode, TARGET_SH2A));
3350 return true;
3352 return false;
3354 /* The cost of a mem access is mainly the cost of the address mode. */
3355 case MEM:
3356 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3357 true);
3358 return true;
3360 /* The cost of a sign or zero extend depends on whether the source is a
3361 reg or a mem. In case of a mem take the address into acount. */
3362 case SIGN_EXTEND:
3363 if (REG_P (XEXP (x, 0)))
3365 *total = COSTS_N_INSNS (1);
3366 return true;
3368 if (MEM_P (XEXP (x, 0)))
3370 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3371 GET_MODE (XEXP (x, 0)),
3372 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3373 return true;
3375 return false;
3377 case ZERO_EXTEND:
3378 if (REG_P (XEXP (x, 0)))
3380 *total = COSTS_N_INSNS (1);
3381 return true;
3383 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3384 && (GET_MODE (XEXP (x, 0)) == QImode
3385 || GET_MODE (XEXP (x, 0)) == HImode))
3387 /* Handle SH2A's movu.b and movu.w insn. */
3388 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3389 GET_MODE (XEXP (x, 0)),
3390 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3391 return true;
3393 return false;
3395 /* mems for SFmode and DFmode can be inside a parallel due to
3396 the way the fpscr is handled. */
3397 case PARALLEL:
3398 for (int i = 0; i < XVECLEN (x, 0); i++)
3400 rtx xx = XVECEXP (x, 0, i);
3401 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3403 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3404 GET_MODE (XEXP (xx, 0)),
3405 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3406 return true;
3408 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3410 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3411 GET_MODE (XEXP (xx, 1)),
3412 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3413 return true;
3417 if (sh_1el_vec (x, VOIDmode))
3418 *total = outer_code != SET;
3419 else if (sh_rep_vec (x, VOIDmode))
3420 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3421 + (outer_code != SET));
3422 else
3423 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3424 return true;
3426 case CONST_INT:
3427 if (TARGET_SHMEDIA)
3429 if (INTVAL (x) == 0)
3430 *total = 0;
3431 else if (outer_code == AND && and_operand ((x), DImode))
3432 *total = 0;
3433 else if ((outer_code == IOR || outer_code == XOR
3434 || outer_code == PLUS)
3435 && CONST_OK_FOR_I10 (INTVAL (x)))
3436 *total = 0;
3437 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3438 *total = COSTS_N_INSNS (outer_code != SET);
3439 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3440 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3441 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3442 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3443 else
3444 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3445 return true;
3447 if (CONST_OK_FOR_I08 (INTVAL (x)))
3448 *total = 0;
3449 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3450 && CONST_OK_FOR_K08 (INTVAL (x)))
3451 *total = 1;
3452 /* prepare_cmp_insn will force costly constants int registers before
3453 the cbranch[sd]i4 patterns can see them, so preserve potentially
3454 interesting ones not covered by I08 above. */
3455 else if (outer_code == COMPARE
3456 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3457 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3458 || INTVAL (x) == 0x7fffffff
3459 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3460 *total = 1;
3461 else
3462 *total = 8;
3463 return true;
3465 case EQ:
3466 /* An and with a constant compared against zero is
3467 most likely going to be a TST #imm, R0 instruction.
3468 Notice that this does not catch the zero_extract variants from
3469 the md file. */
3470 if (GET_CODE (XEXP (x, 0)) == AND
3471 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3473 *total = 1;
3474 return true;
3476 else
3477 return false;
3479 case SMIN:
3480 case SMAX:
3481 /* This is most likely a clips.b or clips.w insn that is being made up
3482 by combine. */
3483 if (TARGET_SH2A
3484 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3485 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3486 && REG_P (XEXP (XEXP (x, 0), 0))
3487 && CONST_INT_P (XEXP (x, 1)))
3489 *total = COSTS_N_INSNS (1);
3490 return true;
3492 else
3493 return false;
3495 case CONST:
3496 case LABEL_REF:
3497 case SYMBOL_REF:
3498 if (TARGET_SHMEDIA64)
3499 *total = COSTS_N_INSNS (4);
3500 else if (TARGET_SHMEDIA32)
3501 *total = COSTS_N_INSNS (2);
3502 else
3503 *total = 5;
3504 return true;
3506 case CONST_DOUBLE:
3507 if (TARGET_SHMEDIA)
3508 *total = COSTS_N_INSNS (4);
3509 /* prepare_cmp_insn will force costly constants int registers before
3510 the cbranchdi4 pattern can see them, so preserve potentially
3511 interesting ones. */
3512 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3513 *total = 1;
3514 else
3515 *total = 10;
3516 return true;
3518 case CONST_VECTOR:
3519 /* FIXME: This looks broken. Only the last statement has any effect.
3520 Probably this could be folded with the PARALLEL case? */
3521 if (x == CONST0_RTX (GET_MODE (x)))
3522 *total = 0;
3523 else if (sh_1el_vec (x, VOIDmode))
3524 *total = outer_code != SET;
3525 if (sh_rep_vec (x, VOIDmode))
3526 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3527 + (outer_code != SET));
3528 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3529 return true;
3531 case PLUS:
3532 case MINUS:
3533 *total = COSTS_N_INSNS (addsubcosts (x));
3534 return true;
3536 case AND:
3537 case XOR:
3538 case IOR:
3539 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3540 return true;
3542 case MULT:
3543 *total = COSTS_N_INSNS (multcosts (x));
3544 return true;
3546 case LT:
3547 case GE:
3548 /* div0s sign comparison. */
3549 if (GET_CODE (XEXP (x, 0)) == XOR
3550 && REG_P ((XEXP (XEXP (x, 0), 0)))
3551 && REG_P ((XEXP (XEXP (x, 0), 1)))
3552 && satisfies_constraint_Z (XEXP (x, 1)))
3554 *total = COSTS_N_INSNS (1);
3555 return true;
3557 else
3558 return false;
3560 case LSHIFTRT:
3561 /* div0s sign comparison. */
3562 if (GET_CODE (XEXP (x, 0)) == XOR
3563 && REG_P ((XEXP (XEXP (x, 0), 0)))
3564 && REG_P ((XEXP (XEXP (x, 0), 1)))
3565 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3567 *total = COSTS_N_INSNS (1);
3568 return true;
3570 /* Fall through to shiftcosts. */
3571 case ASHIFT:
3572 case ASHIFTRT:
3574 int cost = shiftcosts (x);
3575 if (cost < 0)
3576 return false;
3577 *total = COSTS_N_INSNS (cost);
3578 return true;
3581 case DIV:
3582 case UDIV:
3583 case MOD:
3584 case UMOD:
3585 *total = COSTS_N_INSNS (20);
3586 return true;
3588 case FLOAT:
3589 case FIX:
3590 *total = 100;
3591 return true;
3593 default:
3594 return false;
3598 /* Determine the size of the fundamental move insn that will be used
3599 for the specified mode. */
3600 static inline int
3601 mov_insn_size (enum machine_mode mode, bool consider_sh2a)
3603 const int mode_sz = GET_MODE_SIZE (mode);
3605 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3606 || (TARGET_FMOVD && mode == DFmode))
3607 return mode_sz;
3608 else
3610 /* The max. available mode for actual move insns is SImode.
3611 Larger accesses will be split into multiple loads/stores. */
3612 const int max_mov_sz = GET_MODE_SIZE (SImode);
3613 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3617 /* Determine the maximum possible displacement for a move insn for the
3618 specified mode. */
3620 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3622 /* The 4 byte displacement move insns are the same as the 2 byte
3623 versions but take a 12 bit displacement. All we need to do is to
3624 scale the max. displacement value accordingly. */
3625 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3627 /* SH2A supports FPU move insns with 12 bit displacements.
3628 Other variants to do not support any kind of displacements for
3629 FPU move insns. */
3630 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3631 return 0;
3632 else
3634 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3635 const int mode_sz = GET_MODE_SIZE (mode);
3636 int r = 15 * mov_insn_sz * disp_scale;
3638 /* If the mov insn will be split into multiple loads/stores, the
3639 maximum possible displacement is a bit smaller. */
3640 if (mode_sz > mov_insn_sz)
3641 r -= mode_sz - mov_insn_sz;
3642 return r;
3646 /* Determine the alignment mask for a move insn of the
3647 specified mode. */
3648 static inline int
3649 mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
3651 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3652 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3655 /* Return the displacement value of a displacement address. */
3656 HOST_WIDE_INT
3657 sh_disp_addr_displacement (rtx x)
3659 gcc_assert (satisfies_constraint_Sdd (x));
3660 return INTVAL (XEXP (XEXP (x, 0), 1));
3663 /* Compute the cost of an address. */
3664 static int
3665 sh_address_cost (rtx x, enum machine_mode mode,
3666 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3668 /* 'GBR + 0'. Account one more because of R0 restriction. */
3669 if (REG_P (x) && REGNO (x) == GBR_REG)
3670 return 2;
3672 /* Simple reg, post-inc, pre-dec addressing. */
3673 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3674 return 1;
3676 /* 'reg + disp' addressing. */
3677 if (GET_CODE (x) == PLUS
3678 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3680 /* 'GBR + disp'. Account one more because of R0 restriction. */
3681 if (REGNO (XEXP (x, 0)) == GBR_REG
3682 && gbr_displacement (XEXP (x, 1), mode))
3683 return 2;
3685 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3687 if (offset == 0)
3688 return 1;
3690 /* The displacement would fit into a 2 byte move insn.
3691 HImode and QImode loads/stores with displacement put pressure on
3692 R0 which will most likely require another reg copy. Thus account
3693 a higher cost for that. */
3694 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3695 return (mode == HImode || mode == QImode) ? 2 : 1;
3697 /* The displacement would fit into a 4 byte move insn (SH2A). */
3698 if (TARGET_SH2A
3699 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3700 return 2;
3702 /* The displacement is probably out of range and will require extra
3703 calculations. */
3704 return 3;
3707 /* 'reg + reg' addressing. Account a slightly higher cost because of
3708 increased pressure on R0. */
3709 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3710 && ! TARGET_SHMEDIA)
3711 return 3;
3713 /* Not sure what it is - probably expensive. */
3714 return 10;
3717 /* Code to expand a shift. */
3718 static void
3719 gen_ashift (int type, int n, rtx reg)
3721 rtx n_rtx;
3723 /* Negative values here come from the shift_amounts array. */
3724 if (n < 0)
3726 if (type == ASHIFT)
3727 type = LSHIFTRT;
3728 else
3729 type = ASHIFT;
3730 n = -n;
3733 n_rtx = GEN_INT (n);
3734 gcc_assert (satisfies_constraint_P27 (n_rtx));
3736 switch (type)
3738 case ASHIFTRT:
3739 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3740 break;
3741 case LSHIFTRT:
3742 if (n == 1)
3743 emit_insn (gen_shlr (reg, reg));
3744 else
3745 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3746 break;
3747 case ASHIFT:
3748 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3749 break;
3750 default:
3751 gcc_unreachable ();
3755 /* Code to expand a HImode shift. */
3756 static void
3757 gen_ashift_hi (int type, int n, rtx reg)
3759 /* Negative values here come from the shift_amounts array. */
3760 if (n < 0)
3762 if (type == ASHIFT)
3763 type = LSHIFTRT;
3764 else
3765 type = ASHIFT;
3766 n = -n;
3769 switch (type)
3771 case ASHIFTRT:
3772 case LSHIFTRT:
3773 /* We don't have HImode right shift operations because using the
3774 ordinary 32 bit shift instructions for that doesn't generate proper
3775 zero/sign extension.
3776 gen_ashift_hi is only called in contexts where we know that the
3777 sign extension works out correctly. */
3779 int offset = 0;
3780 if (GET_CODE (reg) == SUBREG)
3782 offset = SUBREG_BYTE (reg);
3783 reg = SUBREG_REG (reg);
3785 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3786 break;
3788 case ASHIFT:
3789 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3790 break;
3794 /* Output RTL to split a constant shift into its component SH constant
3795 shift instructions. */
3796 void
3797 gen_shifty_op (int code, rtx *operands)
3799 int value = INTVAL (operands[2]);
3800 int max, i;
3802 /* Truncate the shift count in case it is out of bounds. */
3803 value = value & 31;
3805 if (value == 31)
3807 if (code == LSHIFTRT)
3809 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3810 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3811 return;
3813 else if (code == ASHIFT)
3815 /* There is a two instruction sequence for 31 bit left shifts,
3816 but it requires r0. */
3817 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3819 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3820 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3821 return;
3825 else if (value == 0)
3827 /* This can happen even when optimizing, if there were subregs before
3828 reload. Don't output a nop here, as this is never optimized away;
3829 use a no-op move instead. */
3830 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3831 return;
3834 max = ashl_lshr_seq[value].insn_count;
3835 for (i = 0; i < max; i++)
3836 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3839 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3840 don't matter. */
3841 void
3842 gen_shifty_hi_op (int code, rtx *operands)
3844 int value = INTVAL (operands[2]);
3845 int max, i;
3846 void (*gen_fun) (int, int, rtx);
3848 /* This operation is used by and_shl for SImode values with a few
3849 high bits known to be cleared. */
3850 value &= 31;
3851 if (value == 0)
3853 emit_insn (gen_nop ());
3854 return;
3857 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3858 if (code == ASHIFT)
3860 max = ext_ashl_lshr_seq[value].insn_count;
3861 for (i = 0; i < max; i++)
3862 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3864 else
3865 /* When shifting right, emit the shifts in reverse order, so that
3866 solitary negative values come first. */
3867 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3868 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3871 /* Output RTL for an arithmetic right shift.
3872 ??? Rewrite to use super-optimizer sequences. */
3873 bool
3874 expand_ashiftrt (rtx *operands)
3876 rtx wrk;
3877 char func[18];
3878 int value;
3880 if (TARGET_DYNSHIFT)
3882 if (!CONST_INT_P (operands[2]))
3884 rtx count = copy_to_mode_reg (SImode, operands[2]);
3885 emit_insn (gen_negsi2 (count, count));
3886 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3887 return true;
3889 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3890 > 1 + SH_DYNAMIC_SHIFT_COST)
3892 rtx count
3893 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3894 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3895 return true;
3898 if (!CONST_INT_P (operands[2]))
3899 return false;
3901 value = INTVAL (operands[2]) & 31;
3903 if (value == 31)
3905 /* If we are called from abs expansion, arrange things so that we
3906 we can use a single MT instruction that doesn't clobber the source,
3907 if LICM can hoist out the load of the constant zero. */
3908 if (currently_expanding_to_rtl)
3910 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3911 operands[1]));
3912 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3913 return true;
3915 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3916 return true;
3918 else if (value >= 16 && value <= 19)
3920 wrk = gen_reg_rtx (SImode);
3921 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3922 value -= 16;
3923 while (value--)
3924 gen_ashift (ASHIFTRT, 1, wrk);
3925 emit_move_insn (operands[0], wrk);
3926 return true;
3928 /* Expand a short sequence inline, longer call a magic routine. */
3929 else if (value <= 5)
3931 wrk = gen_reg_rtx (SImode);
3932 emit_move_insn (wrk, operands[1]);
3933 while (value--)
3934 gen_ashift (ASHIFTRT, 1, wrk);
3935 emit_move_insn (operands[0], wrk);
3936 return true;
3939 wrk = gen_reg_rtx (Pmode);
3941 /* Load the value into an arg reg and call a helper. */
3942 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3943 sprintf (func, "__ashiftrt_r4_%d", value);
3944 function_symbol (wrk, func, SFUNC_STATIC);
3945 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3946 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3947 return true;
3950 /* Try to find a good way to implement the combiner pattern
3951 [(set (match_operand:SI 0 "register_operand" "r")
3952 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3953 (match_operand:SI 2 "const_int_operand" "n"))
3954 (match_operand:SI 3 "const_int_operand" "n"))) .
3955 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3956 return 0 for simple right / left or left/right shift combination.
3957 return 1 for a combination of shifts with zero_extend.
3958 return 2 for a combination of shifts with an AND that needs r0.
3959 return 3 for a combination of shifts with an AND that needs an extra
3960 scratch register, when the three highmost bits of the AND mask are clear.
3961 return 4 for a combination of shifts with an AND that needs an extra
3962 scratch register, when any of the three highmost bits of the AND mask
3963 is set.
3964 If ATTRP is set, store an initial right shift width in ATTRP[0],
3965 and the instruction length in ATTRP[1] . These values are not valid
3966 when returning 0.
3967 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3968 shift_amounts for the last shift value that is to be used before the
3969 sign extend. */
3971 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3973 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3974 int left = INTVAL (left_rtx), right;
3975 int best = 0;
3976 int cost, best_cost = 10000;
3977 int best_right = 0, best_len = 0;
3978 int i;
3979 int can_ext;
3981 if (left < 0 || left > 31)
3982 return 0;
3983 if (CONST_INT_P (mask_rtx))
3984 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3985 else
3986 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3987 /* Can this be expressed as a right shift / left shift pair? */
3988 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3989 right = exact_log2 (lsb);
3990 mask2 = ~(mask + lsb - 1);
3991 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3992 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3993 if (! mask2)
3994 best_cost = ashl_lshr_seq[right].insn_count
3995 + ashl_lshr_seq[right + left].insn_count;
3996 /* mask has no trailing zeroes <==> ! right */
3997 else if (! right && mask2 == ~(lsb2 - 1))
3999 int late_right = exact_log2 (lsb2);
4000 best_cost = ashl_lshr_seq[left + late_right].insn_count
4001 + ashl_lshr_seq[late_right].insn_count;
4003 /* Try to use zero extend. */
4004 if (mask2 == ~(lsb2 - 1))
4006 int width, first;
4008 for (width = 8; width <= 16; width += 8)
4010 /* Can we zero-extend right away? */
4011 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4013 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4014 + ext_ashl_lshr_seq[left + right].insn_count;
4015 if (cost < best_cost)
4017 best = 1;
4018 best_cost = cost;
4019 best_right = right;
4020 best_len = cost;
4021 if (attrp)
4022 attrp[2] = -1;
4024 continue;
4026 /* ??? Could try to put zero extend into initial right shift,
4027 or even shift a bit left before the right shift. */
4028 /* Determine value of first part of left shift, to get to the
4029 zero extend cut-off point. */
4030 first = width - exact_log2 (lsb2) + right;
4031 if (first >= 0 && right + left - first >= 0)
4033 cost = ext_ashl_lshr_seq[right].insn_count
4034 + ext_ashl_lshr_seq[first].insn_count + 1
4035 + ext_ashl_lshr_seq[right + left - first].insn_count;
4037 if (cost < best_cost)
4039 best = 1;
4040 best_cost = cost;
4041 best_right = right;
4042 best_len = cost;
4043 if (attrp)
4044 attrp[2] = first;
4049 /* Try to use r0 AND pattern */
4050 for (i = 0; i <= 2; i++)
4052 if (i > right)
4053 break;
4054 if (! CONST_OK_FOR_K08 (mask >> i))
4055 continue;
4056 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4057 if (cost < best_cost)
4059 best = 2;
4060 best_cost = cost;
4061 best_right = i;
4062 best_len = cost - 1;
4065 /* Try to use a scratch register to hold the AND operand. */
4066 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4067 for (i = 0; i <= 2; i++)
4069 if (i > right)
4070 break;
4071 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4072 + (can_ext
4073 ? ext_ashl_lshr_seq
4074 : ashl_lshr_seq)[left + i].insn_count;
4075 if (cost < best_cost)
4077 best = 4 - can_ext;
4078 best_cost = cost;
4079 best_right = i;
4080 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4084 if (attrp)
4086 attrp[0] = best_right;
4087 attrp[1] = best_len;
4089 return best;
4092 /* This is used in length attributes of the unnamed instructions
4093 corresponding to shl_and_kind return values of 1 and 2. */
4095 shl_and_length (rtx insn)
4097 rtx set_src, left_rtx, mask_rtx;
4098 int attributes[3];
4100 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4101 left_rtx = XEXP (XEXP (set_src, 0), 1);
4102 mask_rtx = XEXP (set_src, 1);
4103 shl_and_kind (left_rtx, mask_rtx, attributes);
4104 return attributes[1];
4107 /* This is used in length attribute of the and_shl_scratch instruction. */
4109 shl_and_scr_length (rtx insn)
4111 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4112 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4113 rtx op = XEXP (set_src, 0);
4114 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4115 op = XEXP (XEXP (op, 0), 0);
4116 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4119 /* Generate rtl for instructions for which shl_and_kind advised a particular
4120 method of generating them, i.e. returned zero. */
4121 bool
4122 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4124 int attributes[3];
4125 unsigned HOST_WIDE_INT mask;
4126 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4127 int right, total_shift;
4128 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4130 right = attributes[0];
4131 total_shift = INTVAL (left_rtx) + right;
4132 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4133 switch (kind)
4135 default:
4136 return true;
4137 case 1:
4139 int first = attributes[2];
4140 rtx operands[3];
4142 if (first < 0)
4144 emit_insn ((mask << right) <= 0xff
4145 ? gen_zero_extendqisi2 (dest,
4146 gen_lowpart (QImode, source))
4147 : gen_zero_extendhisi2 (dest,
4148 gen_lowpart (HImode, source)));
4149 source = dest;
4151 if (source != dest)
4152 emit_insn (gen_movsi (dest, source));
4153 operands[0] = dest;
4154 if (right)
4156 operands[2] = GEN_INT (right);
4157 gen_shifty_hi_op (LSHIFTRT, operands);
4159 if (first > 0)
4161 operands[2] = GEN_INT (first);
4162 gen_shifty_hi_op (ASHIFT, operands);
4163 total_shift -= first;
4164 mask <<= first;
4166 if (first >= 0)
4167 emit_insn (mask <= 0xff
4168 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4169 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4170 if (total_shift > 0)
4172 operands[2] = GEN_INT (total_shift);
4173 gen_shifty_hi_op (ASHIFT, operands);
4175 break;
4177 case 4:
4178 shift_gen_fun = gen_shifty_op;
4179 case 3:
4180 /* If the topmost bit that matters is set, set the topmost bits
4181 that don't matter. This way, we might be able to get a shorter
4182 signed constant. */
4183 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4184 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4185 case 2:
4186 /* Don't expand fine-grained when combining, because that will
4187 make the pattern fail. */
4188 if (currently_expanding_to_rtl
4189 || reload_in_progress || reload_completed)
4191 rtx operands[3];
4193 /* Cases 3 and 4 should be handled by this split
4194 only while combining */
4195 gcc_assert (kind <= 2);
4196 if (right)
4198 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4199 source = dest;
4201 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4202 if (total_shift)
4204 operands[0] = dest;
4205 operands[1] = dest;
4206 operands[2] = GEN_INT (total_shift);
4207 shift_gen_fun (ASHIFT, operands);
4209 break;
4211 else
4213 int neg = 0;
4214 if (kind != 4 && total_shift < 16)
4216 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4217 if (neg > 0)
4218 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4219 else
4220 neg = 0;
4222 emit_insn (gen_and_shl_scratch (dest, source,
4223 GEN_INT (right),
4224 GEN_INT (mask),
4225 GEN_INT (total_shift + neg),
4226 GEN_INT (neg)));
4227 emit_insn (gen_movsi (dest, dest));
4228 break;
4231 return false;
4234 /* Try to find a good way to implement the combiner pattern
4235 [(set (match_operand:SI 0 "register_operand" "=r")
4236 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4237 (match_operand:SI 2 "const_int_operand" "n")
4238 (match_operand:SI 3 "const_int_operand" "n")
4239 (const_int 0)))
4240 (clobber (reg:SI T_REG))]
4241 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4242 return 0 for simple left / right shift combination.
4243 return 1 for left shift / 8 bit sign extend / left shift.
4244 return 2 for left shift / 16 bit sign extend / left shift.
4245 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4246 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4247 return 5 for left shift / 16 bit sign extend / right shift
4248 return 6 for < 8 bit sign extend / left shift.
4249 return 7 for < 8 bit sign extend / left shift / single right shift.
4250 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4252 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4254 int left, size, insize, ext;
4255 int cost = 0, best_cost;
4256 int kind;
4258 left = INTVAL (left_rtx);
4259 size = INTVAL (size_rtx);
4260 insize = size - left;
4261 gcc_assert (insize > 0);
4262 /* Default to left / right shift. */
4263 kind = 0;
4264 best_cost = ashl_lshr_seq[32 - insize].insn_count
4265 + ashl_lshr_seq[32 - size].insn_count;
4266 if (size <= 16)
4268 /* 16 bit shift / sign extend / 16 bit shift */
4269 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4270 + ashl_lshr_seq[16 - size].insn_count;
4271 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4272 below, by alternative 3 or something even better. */
4273 if (cost < best_cost)
4275 kind = 5;
4276 best_cost = cost;
4279 /* Try a plain sign extend between two shifts. */
4280 for (ext = 16; ext >= insize; ext -= 8)
4282 if (ext <= size)
4284 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4285 + ashl_lshr_seq[size - ext].insn_count;
4286 if (cost < best_cost)
4288 kind = ext / (unsigned) 8;
4289 best_cost = cost;
4292 /* Check if we can do a sloppy shift with a final signed shift
4293 restoring the sign. */
4294 if (EXT_SHIFT_SIGNED (size - ext))
4295 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4296 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4297 /* If not, maybe it's still cheaper to do the second shift sloppy,
4298 and do a final sign extend? */
4299 else if (size <= 16)
4300 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4301 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4302 + 1;
4303 else
4304 continue;
4305 if (cost < best_cost)
4307 kind = ext / (unsigned) 8 + 2;
4308 best_cost = cost;
4311 /* Check if we can sign extend in r0 */
4312 if (insize < 8)
4314 cost = 3 + ashl_lshr_seq[left].insn_count;
4315 if (cost < best_cost)
4317 kind = 6;
4318 best_cost = cost;
4320 /* Try the same with a final signed shift. */
4321 if (left < 31)
4323 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4324 if (cost < best_cost)
4326 kind = 7;
4327 best_cost = cost;
4331 if (TARGET_DYNSHIFT)
4333 /* Try to use a dynamic shift. */
4334 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4335 if (cost < best_cost)
4337 kind = 0;
4338 best_cost = cost;
4341 if (costp)
4342 *costp = cost;
4343 return kind;
4346 /* Function to be used in the length attribute of the instructions
4347 implementing this pattern. */
4349 shl_sext_length (rtx insn)
4351 rtx set_src, left_rtx, size_rtx;
4352 int cost;
4354 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4355 left_rtx = XEXP (XEXP (set_src, 0), 1);
4356 size_rtx = XEXP (set_src, 1);
4357 shl_sext_kind (left_rtx, size_rtx, &cost);
4358 return cost;
4361 /* Generate rtl for this pattern */
4362 bool
4363 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4365 int kind;
4366 int left, size, insize, cost;
4367 rtx operands[3];
4369 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4370 left = INTVAL (left_rtx);
4371 size = INTVAL (size_rtx);
4372 insize = size - left;
4373 switch (kind)
4375 case 1:
4376 case 2:
4377 case 3:
4378 case 4:
4380 int ext = kind & 1 ? 8 : 16;
4381 int shift2 = size - ext;
4383 /* Don't expand fine-grained when combining, because that will
4384 make the pattern fail. */
4385 if (! currently_expanding_to_rtl
4386 && ! reload_in_progress && ! reload_completed)
4388 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4389 emit_insn (gen_movsi (dest, source));
4390 break;
4392 if (dest != source)
4393 emit_insn (gen_movsi (dest, source));
4394 operands[0] = dest;
4395 if (ext - insize)
4397 operands[2] = GEN_INT (ext - insize);
4398 gen_shifty_hi_op (ASHIFT, operands);
4400 emit_insn (kind & 1
4401 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4402 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4403 if (kind <= 2)
4405 if (shift2)
4407 operands[2] = GEN_INT (shift2);
4408 gen_shifty_op (ASHIFT, operands);
4411 else
4413 if (shift2 > 0)
4415 if (EXT_SHIFT_SIGNED (shift2))
4417 operands[2] = GEN_INT (shift2 + 1);
4418 gen_shifty_op (ASHIFT, operands);
4419 operands[2] = const1_rtx;
4420 gen_shifty_op (ASHIFTRT, operands);
4421 break;
4423 operands[2] = GEN_INT (shift2);
4424 gen_shifty_hi_op (ASHIFT, operands);
4426 else if (shift2)
4428 operands[2] = GEN_INT (-shift2);
4429 gen_shifty_hi_op (LSHIFTRT, operands);
4431 emit_insn (size <= 8
4432 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4433 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4435 break;
4437 case 5:
4439 int i = 16 - size;
4440 if (! currently_expanding_to_rtl
4441 && ! reload_in_progress && ! reload_completed)
4442 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4443 else
4445 operands[0] = dest;
4446 operands[2] = GEN_INT (16 - insize);
4447 gen_shifty_hi_op (ASHIFT, operands);
4448 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4450 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4451 while (--i >= 0)
4452 gen_ashift (ASHIFTRT, 1, dest);
4453 break;
4455 case 6:
4456 case 7:
4457 /* Don't expand fine-grained when combining, because that will
4458 make the pattern fail. */
4459 if (! currently_expanding_to_rtl
4460 && ! reload_in_progress && ! reload_completed)
4462 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4463 emit_insn (gen_movsi (dest, source));
4464 break;
4466 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4467 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4468 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4469 operands[0] = dest;
4470 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4471 gen_shifty_op (ASHIFT, operands);
4472 if (kind == 7)
4473 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4474 break;
4475 default:
4476 return true;
4478 return false;
4481 /* Prefix a symbol_ref name with "datalabel". */
4483 gen_datalabel_ref (rtx sym)
4485 const char *str;
4487 if (GET_CODE (sym) == LABEL_REF)
4488 return gen_rtx_CONST (GET_MODE (sym),
4489 gen_rtx_UNSPEC (GET_MODE (sym),
4490 gen_rtvec (1, sym),
4491 UNSPEC_DATALABEL));
4493 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4495 str = XSTR (sym, 0);
4496 /* Share all SYMBOL_REF strings with the same value - that is important
4497 for cse. */
4498 str = IDENTIFIER_POINTER (get_identifier (str));
4499 XSTR (sym, 0) = str;
4501 return sym;
4505 static alloc_pool label_ref_list_pool;
4507 typedef struct label_ref_list_d
4509 rtx label;
4510 struct label_ref_list_d *next;
4511 } *label_ref_list_t;
4513 /* The SH cannot load a large constant into a register, constants have to
4514 come from a pc relative load. The reference of a pc relative load
4515 instruction must be less than 1k in front of the instruction. This
4516 means that we often have to dump a constant inside a function, and
4517 generate code to branch around it.
4519 It is important to minimize this, since the branches will slow things
4520 down and make things bigger.
4522 Worst case code looks like:
4524 mov.l L1,rn
4525 bra L2
4527 align
4528 L1: .long value
4532 mov.l L3,rn
4533 bra L4
4535 align
4536 L3: .long value
4540 We fix this by performing a scan before scheduling, which notices which
4541 instructions need to have their operands fetched from the constant table
4542 and builds the table.
4544 The algorithm is:
4546 scan, find an instruction which needs a pcrel move. Look forward, find the
4547 last barrier which is within MAX_COUNT bytes of the requirement.
4548 If there isn't one, make one. Process all the instructions between
4549 the find and the barrier.
4551 In the above example, we can tell that L3 is within 1k of L1, so
4552 the first move can be shrunk from the 3 insn+constant sequence into
4553 just 1 insn, and the constant moved to L3 to make:
4555 mov.l L1,rn
4557 mov.l L3,rn
4558 bra L4
4560 align
4561 L3:.long value
4562 L4:.long value
4564 Then the second move becomes the target for the shortening process. */
4566 typedef struct
4568 rtx value; /* Value in table. */
4569 rtx label; /* Label of value. */
4570 label_ref_list_t wend; /* End of window. */
4571 enum machine_mode mode; /* Mode of value. */
4573 /* True if this constant is accessed as part of a post-increment
4574 sequence. Note that HImode constants are never accessed in this way. */
4575 bool part_of_sequence_p;
4576 } pool_node;
4578 /* The maximum number of constants that can fit into one pool, since
4579 constants in the range 0..510 are at least 2 bytes long, and in the
4580 range from there to 1018 at least 4 bytes. */
4582 #define MAX_POOL_SIZE 372
4583 static pool_node pool_vector[MAX_POOL_SIZE];
4584 static int pool_size;
4585 static rtx pool_window_label;
4586 static int pool_window_last;
4588 static int max_labelno_before_reorg;
4590 /* ??? If we need a constant in HImode which is the truncated value of a
4591 constant we need in SImode, we could combine the two entries thus saving
4592 two bytes. Is this common enough to be worth the effort of implementing
4593 it? */
4595 /* ??? This stuff should be done at the same time that we shorten branches.
4596 As it is now, we must assume that all branches are the maximum size, and
4597 this causes us to almost always output constant pools sooner than
4598 necessary. */
4600 /* Add a constant to the pool and return its label. */
4601 static rtx
4602 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4604 int i;
4605 rtx lab, new_rtx;
4606 label_ref_list_t ref, newref;
4608 /* First see if we've already got it. */
4609 for (i = 0; i < pool_size; i++)
4611 if (x->code == pool_vector[i].value->code
4612 && mode == pool_vector[i].mode)
4614 if (x->code == CODE_LABEL)
4616 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4617 continue;
4619 if (rtx_equal_p (x, pool_vector[i].value))
4621 lab = new_rtx = 0;
4622 if (! last_value
4623 || ! i
4624 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4626 new_rtx = gen_label_rtx ();
4627 LABEL_REFS (new_rtx) = pool_vector[i].label;
4628 pool_vector[i].label = lab = new_rtx;
4630 if (lab && pool_window_label)
4632 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4633 newref->label = pool_window_label;
4634 ref = pool_vector[pool_window_last].wend;
4635 newref->next = ref;
4636 pool_vector[pool_window_last].wend = newref;
4638 if (new_rtx)
4639 pool_window_label = new_rtx;
4640 pool_window_last = i;
4641 return lab;
4646 /* Need a new one. */
4647 pool_vector[pool_size].value = x;
4648 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4650 lab = 0;
4651 pool_vector[pool_size - 1].part_of_sequence_p = true;
4653 else
4654 lab = gen_label_rtx ();
4655 pool_vector[pool_size].mode = mode;
4656 pool_vector[pool_size].label = lab;
4657 pool_vector[pool_size].wend = NULL;
4658 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4659 if (lab && pool_window_label)
4661 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4662 newref->label = pool_window_label;
4663 ref = pool_vector[pool_window_last].wend;
4664 newref->next = ref;
4665 pool_vector[pool_window_last].wend = newref;
4667 if (lab)
4668 pool_window_label = lab;
4669 pool_window_last = pool_size;
4670 pool_size++;
4671 return lab;
4674 /* Output the literal table. START, if nonzero, is the first instruction
4675 this table is needed for, and also indicates that there is at least one
4676 casesi_worker_2 instruction; We have to emit the operand3 labels from
4677 these insns at a 4-byte aligned position. BARRIER is the barrier
4678 after which we are to place the table. */
4679 static void
4680 dump_table (rtx start, rtx barrier)
4682 rtx scan = barrier;
4683 int i;
4684 bool need_align = true;
4685 rtx lab;
4686 label_ref_list_t ref;
4687 bool have_df = false;
4689 /* Do two passes, first time dump out the HI sized constants. */
4691 for (i = 0; i < pool_size; i++)
4693 pool_node *p = &pool_vector[i];
4695 if (p->mode == HImode)
4697 if (need_align)
4699 scan = emit_insn_after (gen_align_2 (), scan);
4700 need_align = false;
4702 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4703 scan = emit_label_after (lab, scan);
4704 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4705 scan);
4706 for (ref = p->wend; ref; ref = ref->next)
4708 lab = ref->label;
4709 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4712 else if (p->mode == DFmode)
4713 have_df = true;
4716 need_align = true;
4718 if (start)
4720 scan = emit_insn_after (gen_align_4 (), scan);
4721 need_align = false;
4722 for (; start != barrier; start = NEXT_INSN (start))
4723 if (NONJUMP_INSN_P (start)
4724 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4726 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4727 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4729 scan = emit_label_after (lab, scan);
4732 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4734 rtx align_insn = NULL_RTX;
4736 scan = emit_label_after (gen_label_rtx (), scan);
4737 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4738 need_align = false;
4740 for (i = 0; i < pool_size; i++)
4742 pool_node *p = &pool_vector[i];
4744 switch (p->mode)
4746 case HImode:
4747 break;
4748 case SImode:
4749 case SFmode:
4750 if (align_insn && !p->part_of_sequence_p)
4752 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4753 emit_label_before (lab, align_insn);
4754 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4755 align_insn);
4756 for (ref = p->wend; ref; ref = ref->next)
4758 lab = ref->label;
4759 emit_insn_before (gen_consttable_window_end (lab),
4760 align_insn);
4762 delete_insn (align_insn);
4763 align_insn = NULL_RTX;
4764 continue;
4766 else
4768 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4769 scan = emit_label_after (lab, scan);
4770 scan = emit_insn_after (gen_consttable_4 (p->value,
4771 const0_rtx), scan);
4772 need_align = ! need_align;
4774 break;
4775 case DFmode:
4776 if (need_align)
4778 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4779 align_insn = scan;
4780 need_align = false;
4782 case DImode:
4783 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4784 scan = emit_label_after (lab, scan);
4785 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4786 scan);
4787 break;
4788 default:
4789 gcc_unreachable ();
4792 if (p->mode != HImode)
4794 for (ref = p->wend; ref; ref = ref->next)
4796 lab = ref->label;
4797 scan = emit_insn_after (gen_consttable_window_end (lab),
4798 scan);
4803 pool_size = 0;
4806 for (i = 0; i < pool_size; i++)
4808 pool_node *p = &pool_vector[i];
4810 switch (p->mode)
4812 case HImode:
4813 break;
4814 case SImode:
4815 case SFmode:
4816 if (need_align)
4818 need_align = false;
4819 scan = emit_label_after (gen_label_rtx (), scan);
4820 scan = emit_insn_after (gen_align_4 (), scan);
4822 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4823 scan = emit_label_after (lab, scan);
4824 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4825 scan);
4826 break;
4827 case DFmode:
4828 case DImode:
4829 if (need_align)
4831 need_align = false;
4832 scan = emit_label_after (gen_label_rtx (), scan);
4833 scan = emit_insn_after (gen_align_4 (), scan);
4835 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4836 scan = emit_label_after (lab, scan);
4837 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4838 scan);
4839 break;
4840 default:
4841 gcc_unreachable ();
4844 if (p->mode != HImode)
4846 for (ref = p->wend; ref; ref = ref->next)
4848 lab = ref->label;
4849 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4854 scan = emit_insn_after (gen_consttable_end (), scan);
4855 scan = emit_barrier_after (scan);
4856 pool_size = 0;
4857 pool_window_label = NULL_RTX;
4858 pool_window_last = 0;
4861 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4863 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4865 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4866 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4867 need to fix it if the input value is CONST_OK_FOR_I08. */
4868 static bool
4869 broken_move (rtx insn)
4871 if (NONJUMP_INSN_P (insn))
4873 rtx pat = PATTERN (insn);
4874 if (GET_CODE (pat) == PARALLEL)
4875 pat = XVECEXP (pat, 0, 0);
4876 if (GET_CODE (pat) == SET
4877 /* We can load any 8-bit value if we don't care what the high
4878 order bits end up as. */
4879 && GET_MODE (SET_DEST (pat)) != QImode
4880 && (CONSTANT_P (SET_SRC (pat))
4881 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4882 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4883 /* Match mova_const. */
4884 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4885 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4886 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4887 && ! (TARGET_SH2E
4888 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4889 && (fp_zero_operand (SET_SRC (pat))
4890 || fp_one_operand (SET_SRC (pat)))
4891 /* In general we don't know the current setting of fpscr, so
4892 disable fldi.
4893 There is an exception if this was a register-register move
4894 before reload - and hence it was ascertained that we have
4895 single precision setting - and in a post-reload optimization
4896 we changed this to do a constant load. In that case
4897 we don't have an r0 clobber, hence we must use fldi. */
4898 && (TARGET_FMOVD
4899 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4900 == SCRATCH))
4901 && REG_P (SET_DEST (pat))
4902 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4903 && ! (TARGET_SH2A
4904 && GET_MODE (SET_DEST (pat)) == SImode
4905 && (satisfies_constraint_I20 (SET_SRC (pat))
4906 || satisfies_constraint_I28 (SET_SRC (pat))))
4907 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4908 return true;
4911 return false;
4914 /* Return true if the specified insn is a mova insn. */
4915 static bool
4916 mova_p (rtx insn)
4918 return (NONJUMP_INSN_P (insn)
4919 && GET_CODE (PATTERN (insn)) == SET
4920 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4921 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4922 /* Don't match mova_const. */
4923 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4926 /* Fix up a mova from a switch that went out of range. */
4927 static void
4928 fixup_mova (rtx mova)
4930 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4931 if (! flag_pic)
4933 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4934 INSN_CODE (mova) = -1;
4936 else
4938 rtx worker = mova;
4939 rtx lab = gen_label_rtx ();
4940 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4944 worker = NEXT_INSN (worker);
4945 gcc_assert (worker
4946 && !LABEL_P (worker)
4947 && !JUMP_P (worker));
4948 } while (NOTE_P (worker)
4949 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4950 wpat = PATTERN (worker);
4951 wpat0 = XVECEXP (wpat, 0, 0);
4952 wpat1 = XVECEXP (wpat, 0, 1);
4953 wsrc = SET_SRC (wpat0);
4954 PATTERN (worker) = (gen_casesi_worker_2
4955 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4956 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4957 XEXP (wpat1, 0)));
4958 INSN_CODE (worker) = -1;
4959 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4960 base = gen_rtx_LABEL_REF (Pmode, lab);
4961 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4962 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4963 INSN_CODE (mova) = -1;
4967 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4968 *num_mova, and check if the new mova is not nested within the first one.
4969 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4970 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4971 static int
4972 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4974 int n_addr = 0; /* Initialization to shut up spurious warning. */
4975 int f_target, n_target = 0; /* Likewise. */
4977 if (optimize)
4979 /* If NEW_MOVA has no address yet, it will be handled later. */
4980 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4981 return -1;
4983 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4984 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4985 if (n_addr > n_target || n_addr + 1022 < n_target)
4987 /* Change the mova into a load.
4988 broken_move will then return true for it. */
4989 fixup_mova (new_mova);
4990 return 1;
4993 if (!(*num_mova)++)
4995 *first_mova = new_mova;
4996 return 2;
4998 if (!optimize
4999 || ((f_target
5000 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5001 >= n_target))
5002 return -1;
5004 (*num_mova)--;
5005 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5006 > n_target - n_addr)
5008 fixup_mova (*first_mova);
5009 return 0;
5011 else
5013 fixup_mova (new_mova);
5014 return 1;
5018 /* Find the last barrier from insn FROM which is close enough to hold the
5019 constant pool. If we can't find one, then create one near the end of
5020 the range. */
5021 static rtx
5022 find_barrier (int num_mova, rtx mova, rtx from)
5024 int count_si = 0;
5025 int count_hi = 0;
5026 int found_hi = 0;
5027 int found_si = 0;
5028 int found_di = 0;
5029 int hi_align = 2;
5030 int si_align = 2;
5031 int leading_mova = num_mova;
5032 rtx barrier_before_mova = NULL_RTX;
5033 rtx found_barrier = NULL_RTX;
5034 rtx good_barrier = NULL_RTX;
5035 int si_limit;
5036 int hi_limit;
5037 rtx orig = from;
5038 rtx last_got = NULL_RTX;
5039 rtx last_symoff = NULL_RTX;
5041 /* For HImode: range is 510, add 4 because pc counts from address of
5042 second instruction after this one, subtract 2 for the jump instruction
5043 that we may need to emit before the table, subtract 2 for the instruction
5044 that fills the jump delay slot (in very rare cases, reorg will take an
5045 instruction from after the constant pool or will leave the delay slot
5046 empty). This gives 510.
5047 For SImode: range is 1020, add 4 because pc counts from address of
5048 second instruction after this one, subtract 2 in case pc is 2 byte
5049 aligned, subtract 2 for the jump instruction that we may need to emit
5050 before the table, subtract 2 for the instruction that fills the jump
5051 delay slot. This gives 1018. */
5053 /* The branch will always be shortened now that the reference address for
5054 forward branches is the successor address, thus we need no longer make
5055 adjustments to the [sh]i_limit for -O0. */
5057 si_limit = 1018;
5058 hi_limit = 510;
5060 while (from && count_si < si_limit && count_hi < hi_limit)
5062 int inc = get_attr_length (from);
5063 int new_align = 1;
5065 /* If this is a label that existed at the time of the compute_alignments
5066 call, determine the alignment. N.B. When find_barrier recurses for
5067 an out-of-reach mova, we might see labels at the start of previously
5068 inserted constant tables. */
5069 if (LABEL_P (from)
5070 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5072 if (optimize)
5073 new_align = 1 << label_to_alignment (from);
5074 else if (BARRIER_P (prev_nonnote_insn (from)))
5075 new_align = 1 << barrier_align (from);
5076 else
5077 new_align = 1;
5078 inc = 0;
5080 /* In case we are scanning a constant table because of recursion, check
5081 for explicit alignments. If the table is long, we might be forced
5082 to emit the new table in front of it; the length of the alignment
5083 might be the last straw. */
5084 else if (NONJUMP_INSN_P (from)
5085 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5086 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5087 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5088 /* When we find the end of a constant table, paste the new constant
5089 at the end. That is better than putting it in front because
5090 this way, we don't need extra alignment for adding a 4-byte-aligned
5091 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5092 else if (NONJUMP_INSN_P (from)
5093 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5094 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5095 return from;
5097 if (BARRIER_P (from))
5099 rtx next;
5101 found_barrier = from;
5103 /* If we are at the end of the function, or in front of an alignment
5104 instruction, we need not insert an extra alignment. We prefer
5105 this kind of barrier. */
5106 if (barrier_align (from) > 2)
5107 good_barrier = from;
5109 /* If we are at the end of a hot/cold block, dump the constants
5110 here. */
5111 next = NEXT_INSN (from);
5112 if (next
5113 && NOTE_P (next)
5114 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5115 break;
5118 if (broken_move (from))
5120 rtx pat, src, dst;
5121 enum machine_mode mode;
5123 pat = PATTERN (from);
5124 if (GET_CODE (pat) == PARALLEL)
5125 pat = XVECEXP (pat, 0, 0);
5126 src = SET_SRC (pat);
5127 dst = SET_DEST (pat);
5128 mode = GET_MODE (dst);
5130 /* GOT pcrelat setting comes in pair of
5131 mova .L8,r0
5132 mov.l .L8,r12
5133 instructions. (plus add r0,r12).
5134 Remember if we see one without the other. */
5135 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5136 last_got = last_got ? NULL_RTX : from;
5137 else if (PIC_ADDR_P (src))
5138 last_got = last_got ? NULL_RTX : from;
5140 /* We must explicitly check the mode, because sometimes the
5141 front end will generate code to load unsigned constants into
5142 HImode targets without properly sign extending them. */
5143 if (mode == HImode
5144 || (mode == SImode && satisfies_constraint_I16 (src)
5145 && REGNO (dst) != FPUL_REG))
5147 found_hi += 2;
5148 /* We put the short constants before the long constants, so
5149 we must count the length of short constants in the range
5150 for the long constants. */
5151 /* ??? This isn't optimal, but is easy to do. */
5152 si_limit -= 2;
5154 else
5156 /* We dump DF/DI constants before SF/SI ones, because
5157 the limit is the same, but the alignment requirements
5158 are higher. We may waste up to 4 additional bytes
5159 for alignment, and the DF/DI constant may have
5160 another SF/SI constant placed before it. */
5161 if (TARGET_SHCOMPACT
5162 && ! found_di
5163 && (mode == DFmode || mode == DImode))
5165 found_di = 1;
5166 si_limit -= 8;
5168 while (si_align > 2 && found_si + si_align - 2 > count_si)
5169 si_align >>= 1;
5170 if (found_si > count_si)
5171 count_si = found_si;
5172 found_si += GET_MODE_SIZE (mode);
5173 if (num_mova)
5174 si_limit -= GET_MODE_SIZE (mode);
5178 if (mova_p (from))
5180 switch (untangle_mova (&num_mova, &mova, from))
5182 case 1:
5183 if (flag_pic)
5185 rtx src = SET_SRC (PATTERN (from));
5186 if (GET_CODE (src) == CONST
5187 && GET_CODE (XEXP (src, 0)) == UNSPEC
5188 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5189 last_symoff = from;
5191 break;
5192 case 0: return find_barrier (0, 0, mova);
5193 case 2:
5195 leading_mova = 0;
5196 barrier_before_mova
5197 = good_barrier ? good_barrier : found_barrier;
5199 default: break;
5201 if (found_si > count_si)
5202 count_si = found_si;
5204 else if (JUMP_TABLE_DATA_P (from)
5205 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5207 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5208 || (num_mova
5209 && (prev_nonnote_insn (from)
5210 == XEXP (MOVA_LABELREF (mova), 0))))
5211 num_mova--;
5212 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5214 /* We have just passed the barrier in front of the
5215 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5216 the ADDR_DIFF_VEC is accessed as data, just like our pool
5217 constants, this is a good opportunity to accommodate what
5218 we have gathered so far.
5219 If we waited any longer, we could end up at a barrier in
5220 front of code, which gives worse cache usage for separated
5221 instruction / data caches. */
5222 good_barrier = found_barrier;
5223 break;
5225 else
5227 rtx body = PATTERN (from);
5228 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5231 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5232 else if (JUMP_P (from)
5233 && ! TARGET_SH2
5234 && ! optimize_size)
5235 new_align = 4;
5237 /* There is a possibility that a bf is transformed into a bf/s by the
5238 delay slot scheduler. */
5239 if (JUMP_P (from)
5240 && get_attr_type (from) == TYPE_CBRANCH
5241 && ! sequence_insn_p (from))
5242 inc += 2;
5244 if (found_si)
5246 count_si += inc;
5247 if (new_align > si_align)
5249 si_limit -= (count_si - 1) & (new_align - si_align);
5250 si_align = new_align;
5252 count_si = (count_si + new_align - 1) & -new_align;
5254 if (found_hi)
5256 count_hi += inc;
5257 if (new_align > hi_align)
5259 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5260 hi_align = new_align;
5262 count_hi = (count_hi + new_align - 1) & -new_align;
5264 from = NEXT_INSN (from);
5267 if (num_mova)
5269 if (leading_mova)
5271 /* Try as we might, the leading mova is out of range. Change
5272 it into a load (which will become a pcload) and retry. */
5273 fixup_mova (mova);
5274 return find_barrier (0, 0, mova);
5276 else
5278 /* Insert the constant pool table before the mova instruction,
5279 to prevent the mova label reference from going out of range. */
5280 from = mova;
5281 good_barrier = found_barrier = barrier_before_mova;
5285 if (found_barrier)
5287 if (good_barrier && next_real_insn (found_barrier))
5288 found_barrier = good_barrier;
5290 else
5292 /* We didn't find a barrier in time to dump our stuff,
5293 so we'll make one. */
5294 rtx label = gen_label_rtx ();
5296 /* Don't emit a constant table in the middle of insns for
5297 casesi_worker_2. This is a bit overkill but is enough
5298 because casesi_worker_2 wouldn't appear so frequently. */
5299 if (last_symoff)
5300 from = last_symoff;
5302 /* If we exceeded the range, then we must back up over the last
5303 instruction we looked at. Otherwise, we just need to undo the
5304 NEXT_INSN at the end of the loop. */
5305 if (PREV_INSN (from) != orig
5306 && (count_hi > hi_limit || count_si > si_limit))
5307 from = PREV_INSN (PREV_INSN (from));
5308 else
5309 from = PREV_INSN (from);
5311 /* Don't emit a constant table int the middle of global pointer setting,
5312 since that that would move the addressing base GOT into another table.
5313 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5314 in the pool anyway, so just move up the whole constant pool.
5316 However, avoid doing so when the last single GOT mov is the starting
5317 insn itself. Going past above the start insn would create a negative
5318 offset, causing errors. */
5319 if (last_got && last_got != orig)
5320 from = PREV_INSN (last_got);
5322 /* Don't insert the constant pool table at the position which
5323 may be the landing pad. */
5324 if (flag_exceptions
5325 && CALL_P (from)
5326 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5327 from = PREV_INSN (from);
5329 /* Walk back to be just before any jump or label.
5330 Putting it before a label reduces the number of times the branch
5331 around the constant pool table will be hit. Putting it before
5332 a jump makes it more likely that the bra delay slot will be
5333 filled. */
5334 while (NOTE_P (from) || JUMP_P (from)
5335 || LABEL_P (from))
5336 from = PREV_INSN (from);
5338 /* Make sure we do not split between a call and its corresponding
5339 CALL_ARG_LOCATION note. */
5340 if (CALL_P (from))
5342 rtx next = NEXT_INSN (from);
5343 if (next && NOTE_P (next)
5344 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5345 from = next;
5348 from = emit_jump_insn_after (gen_jump (label), from);
5349 JUMP_LABEL (from) = label;
5350 LABEL_NUSES (label) = 1;
5351 found_barrier = emit_barrier_after (from);
5352 emit_label_after (label, found_barrier);
5355 return found_barrier;
5358 /* If the instruction INSN is implemented by a special function, and we can
5359 positively find the register that is used to call the sfunc, and this
5360 register is not used anywhere else in this instruction - except as the
5361 destination of a set, return this register; else, return 0. */
5363 sfunc_uses_reg (rtx insn)
5365 int i;
5366 rtx pattern, part, reg_part, reg;
5368 if (!NONJUMP_INSN_P (insn))
5369 return NULL_RTX;
5370 pattern = PATTERN (insn);
5371 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5372 return NULL_RTX;
5374 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5376 part = XVECEXP (pattern, 0, i);
5377 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5378 reg_part = part;
5380 if (! reg_part)
5381 return NULL_RTX;
5382 reg = XEXP (reg_part, 0);
5383 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5385 part = XVECEXP (pattern, 0, i);
5386 if (part == reg_part || GET_CODE (part) == CLOBBER)
5387 continue;
5388 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5389 && REG_P (SET_DEST (part)))
5390 ? SET_SRC (part) : part)))
5391 return NULL_RTX;
5393 return reg;
5396 /* See if the only way in which INSN uses REG is by calling it, or by
5397 setting it while calling it. Set *SET to a SET rtx if the register
5398 is set by INSN. */
5399 static bool
5400 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
5402 rtx pattern, reg2;
5404 *set = NULL_RTX;
5406 reg2 = sfunc_uses_reg (insn);
5407 if (reg2 && REGNO (reg2) == REGNO (reg))
5409 pattern = single_set (insn);
5410 if (pattern
5411 && REG_P (SET_DEST (pattern))
5412 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5413 *set = pattern;
5414 return false;
5416 if (!CALL_P (insn))
5418 /* We don't use rtx_equal_p because we don't care if the mode is
5419 different. */
5420 pattern = single_set (insn);
5421 if (pattern
5422 && REG_P (SET_DEST (pattern))
5423 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5425 rtx par, part;
5426 int i;
5428 *set = pattern;
5429 par = PATTERN (insn);
5430 if (GET_CODE (par) == PARALLEL)
5431 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5433 part = XVECEXP (par, 0, i);
5434 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5435 return true;
5437 return reg_mentioned_p (reg, SET_SRC (pattern));
5440 return true;
5443 pattern = PATTERN (insn);
5445 if (GET_CODE (pattern) == PARALLEL)
5447 int i;
5449 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5450 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5451 return true;
5452 pattern = XVECEXP (pattern, 0, 0);
5455 if (GET_CODE (pattern) == SET)
5457 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5459 /* We don't use rtx_equal_p, because we don't care if the
5460 mode is different. */
5461 if (!REG_P (SET_DEST (pattern))
5462 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5463 return true;
5465 *set = pattern;
5468 pattern = SET_SRC (pattern);
5471 if (GET_CODE (pattern) != CALL
5472 || !MEM_P (XEXP (pattern, 0))
5473 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5474 return true;
5476 return false;
5479 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5480 general registers. Bits 0..15 mean that the respective registers
5481 are used as inputs in the instruction. Bits 16..31 mean that the
5482 registers 0..15, respectively, are used as outputs, or are clobbered.
5483 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5485 regs_used (rtx x, int is_dest)
5487 enum rtx_code code;
5488 const char *fmt;
5489 int i, used = 0;
5491 if (! x)
5492 return used;
5493 code = GET_CODE (x);
5494 switch (code)
5496 case REG:
5497 if (REGNO (x) < 16)
5498 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5499 << (REGNO (x) + is_dest));
5500 return 0;
5501 case SUBREG:
5503 rtx y = SUBREG_REG (x);
5505 if (!REG_P (y))
5506 break;
5507 if (REGNO (y) < 16)
5508 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5509 << (REGNO (y) +
5510 subreg_regno_offset (REGNO (y),
5511 GET_MODE (y),
5512 SUBREG_BYTE (x),
5513 GET_MODE (x)) + is_dest));
5514 return 0;
5516 case SET:
5517 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5518 case RETURN:
5519 /* If there was a return value, it must have been indicated with USE. */
5520 return 0x00ffff00;
5521 case CLOBBER:
5522 is_dest = 1;
5523 break;
5524 case MEM:
5525 is_dest = 0;
5526 break;
5527 case CALL:
5528 used |= 0x00ff00f0;
5529 break;
5530 default:
5531 break;
5534 fmt = GET_RTX_FORMAT (code);
5536 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5538 if (fmt[i] == 'E')
5540 int j;
5541 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5542 used |= regs_used (XVECEXP (x, i, j), is_dest);
5544 else if (fmt[i] == 'e')
5545 used |= regs_used (XEXP (x, i), is_dest);
5547 return used;
5550 /* Create an instruction that prevents redirection of a conditional branch
5551 to the destination of the JUMP with address ADDR.
5552 If the branch needs to be implemented as an indirect jump, try to find
5553 a scratch register for it.
5554 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5555 If any preceding insn that doesn't fit into a delay slot is good enough,
5556 pass 1. Pass 2 if a definite blocking insn is needed.
5557 -1 is used internally to avoid deep recursion.
5558 If a blocking instruction is made or recognized, return it. */
5559 static rtx
5560 gen_block_redirect (rtx jump, int addr, int need_block)
5562 int dead = 0;
5563 rtx prev = prev_nonnote_insn (jump);
5564 rtx dest;
5566 /* First, check if we already have an instruction that satisfies our need. */
5567 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
5569 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5570 return prev;
5571 if (GET_CODE (PATTERN (prev)) == USE
5572 || GET_CODE (PATTERN (prev)) == CLOBBER
5573 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5574 prev = jump;
5575 else if ((need_block &= ~1) < 0)
5576 return prev;
5577 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5578 need_block = 0;
5580 if (GET_CODE (PATTERN (jump)) == RETURN)
5582 if (! need_block)
5583 return prev;
5584 /* Reorg even does nasty things with return insns that cause branches
5585 to go out of range - see find_end_label and callers. */
5586 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5588 /* We can't use JUMP_LABEL here because it might be undefined
5589 when not optimizing. */
5590 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5591 /* If the branch is out of range, try to find a scratch register for it. */
5592 if (optimize
5593 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5594 > 4092 + 4098))
5596 rtx scan;
5597 /* Don't look for the stack pointer as a scratch register,
5598 it would cause trouble if an interrupt occurred. */
5599 unsigned attempt = 0x7fff, used;
5600 int jump_left = flag_expensive_optimizations + 1;
5602 /* It is likely that the most recent eligible instruction is wanted for
5603 the delay slot. Therefore, find out which registers it uses, and
5604 try to avoid using them. */
5606 for (scan = jump; (scan = PREV_INSN (scan)); )
5608 enum rtx_code code;
5610 if (INSN_DELETED_P (scan))
5611 continue;
5612 code = GET_CODE (scan);
5613 if (code == CODE_LABEL || code == JUMP_INSN)
5614 break;
5615 if (code == INSN
5616 && GET_CODE (PATTERN (scan)) != USE
5617 && GET_CODE (PATTERN (scan)) != CLOBBER
5618 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5620 attempt &= ~regs_used (PATTERN (scan), 0);
5621 break;
5624 for (used = dead = 0, scan = JUMP_LABEL (jump);
5625 (scan = NEXT_INSN (scan)); )
5627 enum rtx_code code;
5629 if (INSN_DELETED_P (scan))
5630 continue;
5631 code = GET_CODE (scan);
5632 if (INSN_P (scan))
5634 used |= regs_used (PATTERN (scan), 0);
5635 if (code == CALL_INSN)
5636 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5637 dead |= (used >> 16) & ~used;
5638 if (dead & attempt)
5640 dead &= attempt;
5641 break;
5643 if (code == JUMP_INSN)
5645 if (jump_left-- && simplejump_p (scan))
5646 scan = JUMP_LABEL (scan);
5647 else
5648 break;
5652 /* Mask out the stack pointer again, in case it was
5653 the only 'free' register we have found. */
5654 dead &= 0x7fff;
5656 /* If the immediate destination is still in range, check for possible
5657 threading with a jump beyond the delay slot insn.
5658 Don't check if we are called recursively; the jump has been or will be
5659 checked in a different invocation then. */
5661 else if (optimize && need_block >= 0)
5663 rtx next = next_active_insn (next_active_insn (dest));
5664 if (next && JUMP_P (next)
5665 && GET_CODE (PATTERN (next)) == SET
5666 && recog_memoized (next) == CODE_FOR_jump_compact)
5668 dest = JUMP_LABEL (next);
5669 if (dest
5670 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5671 > 4092 + 4098))
5672 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5676 if (dead)
5678 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5680 /* It would be nice if we could convert the jump into an indirect
5681 jump / far branch right now, and thus exposing all constituent
5682 instructions to further optimization. However, reorg uses
5683 simplejump_p to determine if there is an unconditional jump where
5684 it should try to schedule instructions from the target of the
5685 branch; simplejump_p fails for indirect jumps even if they have
5686 a JUMP_LABEL. */
5687 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5688 (reg, GEN_INT (unspec_bbr_uid++)),
5689 jump);
5690 /* ??? We would like this to have the scope of the jump, but that
5691 scope will change when a delay slot insn of an inner scope is added.
5692 Hence, after delay slot scheduling, we'll have to expect
5693 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5694 the jump. */
5696 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5697 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5698 return insn;
5700 else if (need_block)
5701 /* We can't use JUMP_LABEL here because it might be undefined
5702 when not optimizing. */
5703 return emit_insn_before (gen_block_branch_redirect
5704 (GEN_INT (unspec_bbr_uid++)),
5705 jump);
5706 return prev;
5709 #define CONDJUMP_MIN -252
5710 #define CONDJUMP_MAX 262
5711 struct far_branch
5713 /* A label (to be placed) in front of the jump
5714 that jumps to our ultimate destination. */
5715 rtx near_label;
5716 /* Where we are going to insert it if we cannot move the jump any farther,
5717 or the jump itself if we have picked up an existing jump. */
5718 rtx insert_place;
5719 /* The ultimate destination. */
5720 rtx far_label;
5721 struct far_branch *prev;
5722 /* If the branch has already been created, its address;
5723 else the address of its first prospective user. */
5724 int address;
5727 static void gen_far_branch (struct far_branch *);
5728 enum mdep_reorg_phase_e mdep_reorg_phase;
5729 static void
5730 gen_far_branch (struct far_branch *bp)
5732 rtx insn = bp->insert_place;
5733 rtx jump;
5734 rtx label = gen_label_rtx ();
5735 int ok;
5737 emit_label_after (label, insn);
5738 if (bp->far_label)
5740 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5741 LABEL_NUSES (bp->far_label)++;
5743 else
5744 jump = emit_jump_insn_after (gen_return (), insn);
5746 /* Emit a barrier so that reorg knows that any following instructions
5747 are not reachable via a fall-through path.
5748 But don't do this when not optimizing, since we wouldn't suppress the
5749 alignment for the barrier then, and could end up with out-of-range
5750 pc-relative loads. */
5751 if (optimize)
5752 emit_barrier_after (jump);
5753 emit_label_after (bp->near_label, insn);
5755 if (bp->far_label)
5756 JUMP_LABEL (jump) = bp->far_label;
5757 else
5759 rtx pat = PATTERN (jump);
5760 gcc_assert (ANY_RETURN_P (pat));
5761 JUMP_LABEL (jump) = pat;
5764 ok = invert_jump (insn, label, 1);
5765 gcc_assert (ok);
5767 /* If we are branching around a jump (rather than a return), prevent
5768 reorg from using an insn from the jump target as the delay slot insn -
5769 when reorg did this, it pessimized code (we rather hide the delay slot)
5770 and it could cause branches to go out of range. */
5771 if (bp->far_label)
5772 (emit_insn_after
5773 (gen_stuff_delay_slot
5774 (GEN_INT (unspec_bbr_uid++),
5775 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5776 insn));
5777 /* Prevent reorg from undoing our splits. */
5778 gen_block_redirect (jump, bp->address += 2, 2);
5781 /* Fix up ADDR_DIFF_VECs. */
5782 void
5783 fixup_addr_diff_vecs (rtx first)
5785 rtx insn;
5787 for (insn = first; insn; insn = NEXT_INSN (insn))
5789 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5791 if (! JUMP_TABLE_DATA_P (insn)
5792 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5793 continue;
5794 pat = PATTERN (insn);
5795 vec_lab = XEXP (XEXP (pat, 0), 0);
5797 /* Search the matching casesi_jump_2. */
5798 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5800 if (!JUMP_P (prev))
5801 continue;
5802 prevpat = PATTERN (prev);
5803 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5804 continue;
5805 x = XVECEXP (prevpat, 0, 1);
5806 if (GET_CODE (x) != USE)
5807 continue;
5808 x = XEXP (x, 0);
5809 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5810 break;
5812 /* FIXME: This is a bug in the optimizer, but it seems harmless
5813 to just avoid panicing. */
5814 if (!prev)
5815 continue;
5817 /* Emit the reference label of the braf where it belongs, right after
5818 the casesi_jump_2 (i.e. braf). */
5819 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5820 emit_label_after (braf_label, prev);
5822 /* Fix up the ADDR_DIF_VEC to be relative
5823 to the reference address of the braf. */
5824 XEXP (XEXP (pat, 0), 0) = braf_label;
5828 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5829 a barrier. Return the base 2 logarithm of the desired alignment. */
5831 barrier_align (rtx barrier_or_label)
5833 rtx next, pat;
5835 if (! barrier_or_label)
5836 return 0;
5838 if (LABEL_P (barrier_or_label)
5839 && NEXT_INSN (barrier_or_label)
5840 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5841 return 2;
5843 if (BARRIER_P (barrier_or_label)
5844 && PREV_INSN (barrier_or_label)
5845 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5847 pat = PATTERN (PREV_INSN (barrier_or_label));
5848 /* If this is a very small table, we want to keep the alignment after
5849 the table to the minimum for proper code alignment. */
5850 return ((optimize_size
5851 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5852 <= (unsigned) 1 << (CACHE_LOG - 2)))
5853 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5856 next = next_active_insn (barrier_or_label);
5858 if (! next)
5859 return 0;
5861 pat = PATTERN (next);
5863 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5864 /* This is a barrier in front of a constant table. */
5865 return 0;
5867 if (optimize_size)
5868 return 0;
5870 if (! TARGET_SH2 || ! optimize)
5871 return align_jumps_log;
5873 /* When fixing up pcloads, a constant table might be inserted just before
5874 the basic block that ends with the barrier. Thus, we can't trust the
5875 instruction lengths before that. */
5876 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5878 /* Check if there is an immediately preceding branch to the insn beyond
5879 the barrier. We must weight the cost of discarding useful information
5880 from the current cache line when executing this branch and there is
5881 an alignment, against that of fetching unneeded insn in front of the
5882 branch target when there is no alignment. */
5884 /* There are two delay_slot cases to consider. One is the simple case
5885 where the preceding branch is to the insn beyond the barrier (simple
5886 delay slot filling), and the other is where the preceding branch has
5887 a delay slot that is a duplicate of the insn after the barrier
5888 (fill_eager_delay_slots) and the branch is to the insn after the insn
5889 after the barrier. */
5891 int slot, credit;
5892 bool jump_to_next = false;
5894 /* Skip to the insn before the JUMP_INSN before the barrier under
5895 investigation. */
5896 rtx prev = prev_real_insn (prev_active_insn (barrier_or_label));
5898 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5899 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5900 prev = prev_real_insn (prev))
5902 jump_to_next = false;
5903 if (GET_CODE (PATTERN (prev)) == USE
5904 || GET_CODE (PATTERN (prev)) == CLOBBER)
5905 continue;
5906 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5908 prev = XVECEXP (PATTERN (prev), 0, 1);
5909 if (INSN_UID (prev) == INSN_UID (next))
5911 /* Delay slot was filled with insn at jump target. */
5912 jump_to_next = true;
5913 continue;
5917 if (slot &&
5918 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5919 slot = 0;
5920 credit -= get_attr_length (prev);
5922 if (prev && jump_to_label_p (prev))
5924 rtx x;
5925 if (jump_to_next
5926 || next_real_insn (JUMP_LABEL (prev)) == next
5927 /* If relax_delay_slots() decides NEXT was redundant
5928 with some previous instruction, it will have
5929 redirected PREV's jump to the following insn. */
5930 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5931 /* There is no upper bound on redundant instructions
5932 that might have been skipped, but we must not put an
5933 alignment where none had been before. */
5934 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5935 (INSN_P (x)
5936 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5937 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5938 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5940 rtx pat = PATTERN (prev);
5941 if (GET_CODE (pat) == PARALLEL)
5942 pat = XVECEXP (pat, 0, 0);
5943 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5944 return 0;
5949 return align_jumps_log;
5952 /* If we are inside a phony loop, almost any kind of label can turn up as the
5953 first one in the loop. Aligning a braf label causes incorrect switch
5954 destination addresses; we can detect braf labels because they are
5955 followed by a BARRIER.
5956 Applying loop alignment to small constant or switch tables is a waste
5957 of space, so we suppress this too. */
5959 sh_loop_align (rtx label)
5961 rtx next = label;
5963 if (! optimize || optimize_size)
5964 return 0;
5967 next = next_nonnote_insn (next);
5968 while (next && LABEL_P (next));
5970 if (! next
5971 || ! INSN_P (next)
5972 || recog_memoized (next) == CODE_FOR_consttable_2)
5973 return 0;
5975 return align_loops_log;
5978 /* Do a final pass over the function, just before delayed branch
5979 scheduling. */
5980 static void
5981 sh_reorg (void)
5983 rtx first, insn, mova = NULL_RTX;
5984 int num_mova;
5985 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5986 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5988 first = get_insns ();
5989 max_labelno_before_reorg = max_label_num ();
5991 /* We must split call insns before introducing `mova's. If we're
5992 optimizing, they'll have already been split. Otherwise, make
5993 sure we don't split them too late. */
5994 if (! optimize)
5995 split_all_insns_noflow ();
5997 if (TARGET_SHMEDIA)
5998 return;
6000 /* If relaxing, generate pseudo-ops to associate function calls with
6001 the symbols they call. It does no harm to not generate these
6002 pseudo-ops. However, when we can generate them, it enables the
6003 linker to potentially relax the jsr to a bsr, and eliminate the
6004 register load and, possibly, the constant pool entry. */
6006 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6007 if (TARGET_RELAX)
6009 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6010 own purposes. This works because none of the remaining passes
6011 need to look at them.
6013 ??? But it may break in the future. We should use a machine
6014 dependent REG_NOTE, or some other approach entirely. */
6015 for (insn = first; insn; insn = NEXT_INSN (insn))
6017 if (INSN_P (insn))
6019 rtx note;
6021 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6022 NULL_RTX)) != 0)
6023 remove_note (insn, note);
6027 for (insn = first; insn; insn = NEXT_INSN (insn))
6029 rtx pattern, reg, link, set, scan, dies, label;
6030 int rescan = 0, foundinsn = 0;
6032 if (CALL_P (insn))
6034 pattern = PATTERN (insn);
6036 if (GET_CODE (pattern) == PARALLEL)
6037 pattern = XVECEXP (pattern, 0, 0);
6038 if (GET_CODE (pattern) == SET)
6039 pattern = SET_SRC (pattern);
6041 if (GET_CODE (pattern) != CALL
6042 || !MEM_P (XEXP (pattern, 0)))
6043 continue;
6045 reg = XEXP (XEXP (pattern, 0), 0);
6047 else
6049 reg = sfunc_uses_reg (insn);
6050 if (! reg)
6051 continue;
6054 if (!REG_P (reg))
6055 continue;
6057 /* Try scanning backward to find where the register is set. */
6058 link = NULL;
6059 for (scan = PREV_INSN (insn);
6060 scan && !LABEL_P (scan);
6061 scan = PREV_INSN (scan))
6063 if (! INSN_P (scan))
6064 continue;
6066 if (! reg_mentioned_p (reg, scan))
6067 continue;
6069 if (noncall_uses_reg (reg, scan, &set))
6070 break;
6072 if (set)
6074 link = scan;
6075 break;
6079 if (! link)
6080 continue;
6082 /* The register is set at LINK. */
6084 /* We can only optimize the function call if the register is
6085 being set to a symbol. In theory, we could sometimes
6086 optimize calls to a constant location, but the assembler
6087 and linker do not support that at present. */
6088 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6089 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6090 continue;
6092 /* Scan forward from LINK to the place where REG dies, and
6093 make sure that the only insns which use REG are
6094 themselves function calls. */
6096 /* ??? This doesn't work for call targets that were allocated
6097 by reload, since there may not be a REG_DEAD note for the
6098 register. */
6100 dies = NULL_RTX;
6101 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6103 rtx scanset;
6105 /* Don't try to trace forward past a CODE_LABEL if we haven't
6106 seen INSN yet. Ordinarily, we will only find the setting insn
6107 if it is in the same basic block. However,
6108 cross-jumping can insert code labels in between the load and
6109 the call, and can result in situations where a single call
6110 insn may have two targets depending on where we came from. */
6112 if (LABEL_P (scan) && ! foundinsn)
6113 break;
6115 if (! INSN_P (scan))
6116 continue;
6118 /* Don't try to trace forward past a JUMP. To optimize
6119 safely, we would have to check that all the
6120 instructions at the jump destination did not use REG. */
6122 if (JUMP_P (scan))
6123 break;
6125 if (! reg_mentioned_p (reg, scan))
6126 continue;
6128 if (noncall_uses_reg (reg, scan, &scanset))
6129 break;
6131 if (scan == insn)
6132 foundinsn = 1;
6134 if (scan != insn
6135 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6137 /* There is a function call to this register other
6138 than the one we are checking. If we optimize
6139 this call, we need to rescan again below. */
6140 rescan = 1;
6143 /* ??? We shouldn't have to worry about SCANSET here.
6144 We should just be able to check for a REG_DEAD note
6145 on a function call. However, the REG_DEAD notes are
6146 apparently not dependable around libcalls; c-torture
6147 execute/920501-2 is a test case. If SCANSET is set,
6148 then this insn sets the register, so it must have
6149 died earlier. Unfortunately, this will only handle
6150 the cases in which the register is, in fact, set in a
6151 later insn. */
6153 /* ??? We shouldn't have to use FOUNDINSN here.
6154 This dates back to when we used LOG_LINKS to find
6155 the most recent insn which sets the register. */
6157 if (foundinsn
6158 && (scanset
6159 || find_reg_note (scan, REG_DEAD, reg)))
6161 dies = scan;
6162 break;
6166 if (! dies)
6168 /* Either there was a branch, or some insn used REG
6169 other than as a function call address. */
6170 continue;
6173 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6174 on the insn which sets the register, and on each call insn
6175 which uses the register. In final_prescan_insn we look for
6176 the REG_LABEL_OPERAND notes, and output the appropriate label
6177 or pseudo-op. */
6179 label = gen_label_rtx ();
6180 add_reg_note (link, REG_LABEL_OPERAND, label);
6181 add_reg_note (insn, REG_LABEL_OPERAND, label);
6182 if (rescan)
6184 scan = link;
6187 rtx reg2;
6189 scan = NEXT_INSN (scan);
6190 if (scan != insn
6191 && ((CALL_P (scan)
6192 && reg_mentioned_p (reg, scan))
6193 || ((reg2 = sfunc_uses_reg (scan))
6194 && REGNO (reg2) == REGNO (reg))))
6195 add_reg_note (scan, REG_LABEL_OPERAND, label);
6197 while (scan != dies);
6202 if (TARGET_SH2)
6203 fixup_addr_diff_vecs (first);
6205 if (optimize)
6207 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6208 shorten_branches (first);
6211 /* Scan the function looking for move instructions which have to be
6212 changed to pc-relative loads and insert the literal tables. */
6213 label_ref_list_pool = create_alloc_pool ("label references list",
6214 sizeof (struct label_ref_list_d),
6215 30);
6216 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6217 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6219 if (mova_p (insn))
6221 /* ??? basic block reordering can move a switch table dispatch
6222 below the switch table. Check if that has happened.
6223 We only have the addresses available when optimizing; but then,
6224 this check shouldn't be needed when not optimizing. */
6225 if (!untangle_mova (&num_mova, &mova, insn))
6227 insn = mova;
6228 num_mova = 0;
6231 else if (JUMP_TABLE_DATA_P (insn)
6232 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6233 && num_mova
6234 /* ??? loop invariant motion can also move a mova out of a
6235 loop. Since loop does this code motion anyway, maybe we
6236 should wrap UNSPEC_MOVA into a CONST, so that reload can
6237 move it back. */
6238 && ((num_mova > 1
6239 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6240 || (prev_nonnote_insn (insn)
6241 == XEXP (MOVA_LABELREF (mova), 0))))
6243 rtx scan;
6244 int total;
6246 num_mova--;
6248 /* Some code might have been inserted between the mova and
6249 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6250 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6251 total += get_attr_length (scan);
6253 /* range of mova is 1020, add 4 because pc counts from address of
6254 second instruction after this one, subtract 2 in case pc is 2
6255 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6256 cancels out with alignment effects of the mova itself. */
6257 if (total > 1022)
6259 /* Change the mova into a load, and restart scanning
6260 there. broken_move will then return true for mova. */
6261 fixup_mova (mova);
6262 insn = mova;
6265 if (broken_move (insn)
6266 || (NONJUMP_INSN_P (insn)
6267 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6269 rtx scan;
6270 /* Scan ahead looking for a barrier to stick the constant table
6271 behind. */
6272 rtx barrier = find_barrier (num_mova, mova, insn);
6273 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
6274 int need_aligned_label = 0;
6276 if (num_mova && ! mova_p (mova))
6278 /* find_barrier had to change the first mova into a
6279 pcload; thus, we have to start with this new pcload. */
6280 insn = mova;
6281 num_mova = 0;
6283 /* Now find all the moves between the points and modify them. */
6284 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6286 if (LABEL_P (scan))
6287 last_float = 0;
6288 if (NONJUMP_INSN_P (scan)
6289 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6290 need_aligned_label = 1;
6291 if (broken_move (scan))
6293 rtx *patp = &PATTERN (scan), pat = *patp;
6294 rtx src, dst;
6295 rtx lab;
6296 rtx newsrc;
6297 enum machine_mode mode;
6299 if (GET_CODE (pat) == PARALLEL)
6300 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6301 src = SET_SRC (pat);
6302 dst = SET_DEST (pat);
6303 mode = GET_MODE (dst);
6305 if (mode == SImode && satisfies_constraint_I16 (src)
6306 && REGNO (dst) != FPUL_REG)
6308 int offset = 0;
6310 mode = HImode;
6311 while (GET_CODE (dst) == SUBREG)
6313 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6314 GET_MODE (SUBREG_REG (dst)),
6315 SUBREG_BYTE (dst),
6316 GET_MODE (dst));
6317 dst = SUBREG_REG (dst);
6319 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6321 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6323 /* This must be an insn that clobbers r0. */
6324 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6325 XVECLEN (PATTERN (scan), 0)
6326 - 1);
6327 rtx clobber = *clobberp;
6329 gcc_assert (GET_CODE (clobber) == CLOBBER
6330 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6332 if (last_float
6333 && reg_set_between_p (r0_rtx, last_float_move, scan))
6334 last_float = 0;
6335 if (last_float
6336 && TARGET_SHCOMPACT
6337 && GET_MODE_SIZE (mode) != 4
6338 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6339 last_float = 0;
6340 lab = add_constant (src, mode, last_float);
6341 if (lab)
6342 emit_insn_before (gen_mova (lab), scan);
6343 else
6345 /* There will be a REG_UNUSED note for r0 on
6346 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6347 lest reorg:mark_target_live_regs will not
6348 consider r0 to be used, and we end up with delay
6349 slot insn in front of SCAN that clobbers r0. */
6350 rtx note
6351 = find_regno_note (last_float_move, REG_UNUSED, 0);
6353 /* If we are not optimizing, then there may not be
6354 a note. */
6355 if (note)
6356 PUT_REG_NOTE_KIND (note, REG_INC);
6358 *last_float_addr = r0_inc_rtx;
6360 last_float_move = scan;
6361 last_float = src;
6362 newsrc = gen_const_mem (mode,
6363 (((TARGET_SH4 && ! TARGET_FMOVD)
6364 || REGNO (dst) == FPUL_REG)
6365 ? r0_inc_rtx
6366 : r0_rtx));
6367 last_float_addr = &XEXP (newsrc, 0);
6369 /* Remove the clobber of r0. */
6370 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6371 gen_rtx_SCRATCH (Pmode));
6373 /* This is a mova needing a label. Create it. */
6374 else if (GET_CODE (src) == UNSPEC
6375 && XINT (src, 1) == UNSPEC_MOVA
6376 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6378 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6379 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6380 newsrc = gen_rtx_UNSPEC (SImode,
6381 gen_rtvec (1, newsrc),
6382 UNSPEC_MOVA);
6384 else if (GET_CODE (src) == UNSPEC_VOLATILE
6385 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6387 newsrc = XVECEXP (src, 0, 0);
6388 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6389 INSN_CODE (scan) = -1;
6390 continue;
6392 else
6394 lab = add_constant (src, mode, 0);
6395 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6396 newsrc = gen_const_mem (mode, newsrc);
6398 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
6399 INSN_CODE (scan) = -1;
6402 dump_table (need_aligned_label ? insn : 0, barrier);
6403 insn = barrier;
6406 free_alloc_pool (label_ref_list_pool);
6407 for (insn = first; insn; insn = NEXT_INSN (insn))
6408 PUT_MODE (insn, VOIDmode);
6410 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6411 INSN_ADDRESSES_FREE ();
6412 split_branches (first);
6414 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6415 also has an effect on the register that holds the address of the sfunc.
6416 Insert an extra dummy insn in front of each sfunc that pretends to
6417 use this register. */
6418 if (flag_delayed_branch)
6420 for (insn = first; insn; insn = NEXT_INSN (insn))
6422 rtx reg = sfunc_uses_reg (insn);
6424 if (! reg)
6425 continue;
6426 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6429 #if 0
6430 /* fpscr is not actually a user variable, but we pretend it is for the
6431 sake of the previous optimization passes, since we want it handled like
6432 one. However, we don't have any debugging information for it, so turn
6433 it into a non-user variable now. */
6434 if (TARGET_SH4)
6435 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
6436 #endif
6437 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6440 /* Return the UID of the insn that follows the specified label. */
6442 get_dest_uid (rtx label, int max_uid)
6444 rtx dest = next_real_insn (label);
6445 int dest_uid;
6446 if (! dest)
6447 /* This can happen for an undefined label. */
6448 return 0;
6449 dest_uid = INSN_UID (dest);
6450 /* If this is a newly created branch redirection blocking instruction,
6451 we cannot index the branch_uid or insn_addresses arrays with its
6452 uid. But then, we won't need to, because the actual destination is
6453 the following branch. */
6454 while (dest_uid >= max_uid)
6456 dest = NEXT_INSN (dest);
6457 dest_uid = INSN_UID (dest);
6459 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6460 return 0;
6461 return dest_uid;
6464 /* Split condbranches that are out of range. Also add clobbers for
6465 scratch registers that are needed in far jumps.
6466 We do this before delay slot scheduling, so that it can take our
6467 newly created instructions into account. It also allows us to
6468 find branches with common targets more easily. */
6469 static void
6470 split_branches (rtx first)
6472 rtx insn;
6473 struct far_branch **uid_branch, *far_branch_list = 0;
6474 int max_uid = get_max_uid ();
6475 int ok;
6477 /* Find out which branches are out of range. */
6478 shorten_branches (first);
6480 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6481 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6483 for (insn = first; insn; insn = NEXT_INSN (insn))
6484 if (! INSN_P (insn))
6485 continue;
6486 else if (INSN_DELETED_P (insn))
6488 /* Shorten_branches would split this instruction again,
6489 so transform it into a note. */
6490 SET_INSN_DELETED (insn);
6492 else if (JUMP_P (insn))
6494 enum attr_type type = get_attr_type (insn);
6495 if (type == TYPE_CBRANCH)
6497 rtx next, beyond;
6499 if (get_attr_length (insn) > 4)
6501 rtx src = SET_SRC (PATTERN (insn));
6502 rtx olabel = XEXP (XEXP (src, 1), 0);
6503 int addr = INSN_ADDRESSES (INSN_UID (insn));
6504 rtx label = 0;
6505 int dest_uid = get_dest_uid (olabel, max_uid);
6506 struct far_branch *bp = uid_branch[dest_uid];
6508 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6509 the label if the LABEL_NUSES count drops to zero. There is
6510 always a jump_optimize pass that sets these values, but it
6511 proceeds to delete unreferenced code, and then if not
6512 optimizing, to un-delete the deleted instructions, thus
6513 leaving labels with too low uses counts. */
6514 if (! optimize)
6516 JUMP_LABEL (insn) = olabel;
6517 LABEL_NUSES (olabel)++;
6519 if (! bp)
6521 bp = (struct far_branch *) alloca (sizeof *bp);
6522 uid_branch[dest_uid] = bp;
6523 bp->prev = far_branch_list;
6524 far_branch_list = bp;
6525 bp->far_label
6526 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6527 LABEL_NUSES (bp->far_label)++;
6529 else
6531 label = bp->near_label;
6532 if (! label && bp->address - addr >= CONDJUMP_MIN)
6534 rtx block = bp->insert_place;
6536 if (GET_CODE (PATTERN (block)) == RETURN)
6537 block = PREV_INSN (block);
6538 else
6539 block = gen_block_redirect (block,
6540 bp->address, 2);
6541 label = emit_label_after (gen_label_rtx (),
6542 PREV_INSN (block));
6543 bp->near_label = label;
6545 else if (label && ! NEXT_INSN (label))
6547 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6548 bp->insert_place = insn;
6549 else
6550 gen_far_branch (bp);
6553 if (! label
6554 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6556 bp->near_label = label = gen_label_rtx ();
6557 bp->insert_place = insn;
6558 bp->address = addr;
6560 ok = redirect_jump (insn, label, 0);
6561 gcc_assert (ok);
6563 else
6565 /* get_attr_length (insn) == 2 */
6566 /* Check if we have a pattern where reorg wants to redirect
6567 the branch to a label from an unconditional branch that
6568 is too far away. */
6569 /* We can't use JUMP_LABEL here because it might be undefined
6570 when not optimizing. */
6571 /* A syntax error might cause beyond to be NULL_RTX. */
6572 beyond
6573 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6574 0));
6576 if (beyond
6577 && (JUMP_P (beyond)
6578 || ((beyond = next_active_insn (beyond))
6579 && JUMP_P (beyond)))
6580 && GET_CODE (PATTERN (beyond)) == SET
6581 && recog_memoized (beyond) == CODE_FOR_jump_compact
6582 && ((INSN_ADDRESSES
6583 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6584 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6585 > 252 + 258 + 2))
6586 gen_block_redirect (beyond,
6587 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6590 next = next_active_insn (insn);
6592 if (next
6593 && (JUMP_P (next)
6594 || ((next = next_active_insn (next))
6595 && JUMP_P (next)))
6596 && GET_CODE (PATTERN (next)) == SET
6597 && recog_memoized (next) == CODE_FOR_jump_compact
6598 && ((INSN_ADDRESSES
6599 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6600 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6601 > 252 + 258 + 2))
6602 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6604 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6606 int addr = INSN_ADDRESSES (INSN_UID (insn));
6607 rtx far_label = 0;
6608 int dest_uid = 0;
6609 struct far_branch *bp;
6611 if (type == TYPE_JUMP)
6613 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6614 dest_uid = get_dest_uid (far_label, max_uid);
6615 if (! dest_uid)
6617 /* Parse errors can lead to labels outside
6618 the insn stream. */
6619 if (! NEXT_INSN (far_label))
6620 continue;
6622 if (! optimize)
6624 JUMP_LABEL (insn) = far_label;
6625 LABEL_NUSES (far_label)++;
6627 redirect_jump (insn, ret_rtx, 1);
6628 far_label = 0;
6631 bp = uid_branch[dest_uid];
6632 if (! bp)
6634 bp = (struct far_branch *) alloca (sizeof *bp);
6635 uid_branch[dest_uid] = bp;
6636 bp->prev = far_branch_list;
6637 far_branch_list = bp;
6638 bp->near_label = 0;
6639 bp->far_label = far_label;
6640 if (far_label)
6641 LABEL_NUSES (far_label)++;
6643 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6644 if (addr - bp->address <= CONDJUMP_MAX)
6645 emit_label_after (bp->near_label, PREV_INSN (insn));
6646 else
6648 gen_far_branch (bp);
6649 bp->near_label = 0;
6651 else
6652 bp->near_label = 0;
6653 bp->address = addr;
6654 bp->insert_place = insn;
6655 if (! far_label)
6656 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6657 else
6658 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6661 /* Generate all pending far branches,
6662 and free our references to the far labels. */
6663 while (far_branch_list)
6665 if (far_branch_list->near_label
6666 && ! NEXT_INSN (far_branch_list->near_label))
6667 gen_far_branch (far_branch_list);
6668 if (optimize
6669 && far_branch_list->far_label
6670 && ! --LABEL_NUSES (far_branch_list->far_label))
6671 delete_insn (far_branch_list->far_label);
6672 far_branch_list = far_branch_list->prev;
6675 /* Instruction length information is no longer valid due to the new
6676 instructions that have been generated. */
6677 init_insn_lengths ();
6680 /* Dump out instruction addresses, which is useful for debugging the
6681 constant pool table stuff.
6683 If relaxing, output the label and pseudo-ops used to link together
6684 calls and the instruction which set the registers.
6686 ??? The addresses printed by this routine for insns are nonsense for
6687 insns which are inside of a sequence where none of the inner insns have
6688 variable length. This is because the second pass of shorten_branches
6689 does not bother to update them. */
6690 void
6691 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6692 int noperands ATTRIBUTE_UNUSED)
6694 if (TARGET_DUMPISIZE)
6695 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6697 if (TARGET_RELAX)
6699 rtx note;
6701 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6702 if (note)
6704 rtx pattern;
6706 pattern = PATTERN (insn);
6707 if (GET_CODE (pattern) == PARALLEL)
6708 pattern = XVECEXP (pattern, 0, 0);
6709 switch (GET_CODE (pattern))
6711 case SET:
6712 if (GET_CODE (SET_SRC (pattern)) != CALL
6713 && get_attr_type (insn) != TYPE_SFUNC)
6715 targetm.asm_out.internal_label
6716 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6717 break;
6719 /* else FALLTHROUGH */
6720 case CALL:
6721 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6722 CODE_LABEL_NUMBER (XEXP (note, 0)));
6723 break;
6725 default:
6726 gcc_unreachable ();
6732 /* Dump out any constants accumulated in the final pass. These will
6733 only be labels. */
6734 const char *
6735 output_jump_label_table (void)
6737 int i;
6739 if (pool_size)
6741 fprintf (asm_out_file, "\t.align 2\n");
6742 for (i = 0; i < pool_size; i++)
6744 pool_node *p = &pool_vector[i];
6746 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6747 CODE_LABEL_NUMBER (p->label));
6748 output_asm_insn (".long %O0", &p->value);
6750 pool_size = 0;
6753 return "";
6756 /* A full frame looks like:
6758 arg-5
6759 arg-4
6760 [ if current_function_anonymous_args
6761 arg-3
6762 arg-2
6763 arg-1
6764 arg-0 ]
6765 saved-fp
6766 saved-r10
6767 saved-r11
6768 saved-r12
6769 saved-pr
6770 local-n
6772 local-1
6773 local-0 <- fp points here.
6775 Number of bytes pushed for anonymous args, used to pass information
6776 between expand_prologue and expand_epilogue.
6778 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6779 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6780 for an epilogue and a negative value means that it's for a sibcall
6781 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6782 all the registers that are about to be restored, and hence dead. */
6783 static void
6784 output_stack_adjust (int size, rtx reg, int epilogue_p,
6785 HARD_REG_SET *live_regs_mask, bool frame_p)
6787 rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6788 if (size)
6790 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6792 /* This test is bogus, as output_stack_adjust is used to re-align the
6793 stack. */
6794 #if 0
6795 gcc_assert (!(size % align));
6796 #endif
6798 if (CONST_OK_FOR_ADD (size))
6799 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6800 /* Try to do it with two partial adjustments; however, we must make
6801 sure that the stack is properly aligned at all times, in case
6802 an interrupt occurs between the two partial adjustments. */
6803 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6804 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6806 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6807 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6809 else
6811 rtx const_reg;
6812 rtx insn;
6813 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6814 int i;
6816 /* If TEMP is invalid, we could temporarily save a general
6817 register to MACL. However, there is currently no need
6818 to handle this case, so just die when we see it. */
6819 if (epilogue_p < 0
6820 || current_function_interrupt
6821 || ! call_really_used_regs[temp] || fixed_regs[temp])
6822 temp = -1;
6823 if (temp < 0 && ! current_function_interrupt
6824 && (TARGET_SHMEDIA || epilogue_p >= 0))
6826 HARD_REG_SET temps;
6827 COPY_HARD_REG_SET (temps, call_used_reg_set);
6828 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6829 if (epilogue_p > 0)
6831 int nreg = 0;
6832 if (crtl->return_rtx)
6834 enum machine_mode mode;
6835 mode = GET_MODE (crtl->return_rtx);
6836 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6837 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6839 for (i = 0; i < nreg; i++)
6840 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6841 if (crtl->calls_eh_return)
6843 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6844 for (i = 0; i <= 3; i++)
6845 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6848 if (TARGET_SHMEDIA && epilogue_p < 0)
6849 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6850 CLEAR_HARD_REG_BIT (temps, i);
6851 if (epilogue_p <= 0)
6853 for (i = FIRST_PARM_REG;
6854 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6855 CLEAR_HARD_REG_BIT (temps, i);
6856 if (cfun->static_chain_decl != NULL)
6857 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6859 temp = scavenge_reg (&temps);
6861 if (temp < 0 && live_regs_mask)
6863 HARD_REG_SET temps;
6865 COPY_HARD_REG_SET (temps, *live_regs_mask);
6866 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6867 temp = scavenge_reg (&temps);
6869 if (temp < 0)
6871 rtx adj_reg, tmp_reg, mem;
6873 /* If we reached here, the most likely case is the (sibcall)
6874 epilogue for non SHmedia. Put a special push/pop sequence
6875 for such case as the last resort. This looks lengthy but
6876 would not be problem because it seems to be very
6877 rare. */
6879 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6882 /* ??? There is still the slight possibility that r4 or
6883 r5 have been reserved as fixed registers or assigned
6884 as global registers, and they change during an
6885 interrupt. There are possible ways to handle this:
6887 - If we are adjusting the frame pointer (r14), we can do
6888 with a single temp register and an ordinary push / pop
6889 on the stack.
6890 - Grab any call-used or call-saved registers (i.e. not
6891 fixed or globals) for the temps we need. We might
6892 also grab r14 if we are adjusting the stack pointer.
6893 If we can't find enough available registers, issue
6894 a diagnostic and die - the user must have reserved
6895 way too many registers.
6896 But since all this is rather unlikely to happen and
6897 would require extra testing, we just die if r4 / r5
6898 are not available. */
6899 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6900 && !global_regs[4] && !global_regs[5]);
6902 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6903 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6904 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6905 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6906 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6907 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6908 emit_move_insn (mem, tmp_reg);
6909 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6910 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6911 emit_move_insn (mem, tmp_reg);
6912 emit_move_insn (reg, adj_reg);
6913 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6914 emit_move_insn (adj_reg, mem);
6915 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6916 emit_move_insn (tmp_reg, mem);
6917 /* Tell flow the insns that pop r4/r5 aren't dead. */
6918 emit_use (tmp_reg);
6919 emit_use (adj_reg);
6920 return;
6922 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6924 /* If SIZE is negative, subtract the positive value.
6925 This sometimes allows a constant pool entry to be shared
6926 between prologue and epilogue code. */
6927 if (size < 0)
6929 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6930 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6932 else
6934 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6935 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6937 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6938 gen_rtx_SET (VOIDmode, reg,
6939 gen_rtx_PLUS (SImode, reg,
6940 GEN_INT (size))));
6945 /* Emit the specified insn and mark it as frame related.
6946 FIXME: Rename this to emit_frame_insn. */
6947 static rtx_insn *
6948 frame_insn (rtx x)
6950 rtx_insn *insn = emit_insn (x);
6951 RTX_FRAME_RELATED_P (insn) = 1;
6952 return insn;
6955 /* Output RTL to push register RN onto the stack. */
6956 static rtx
6957 push (int rn)
6959 rtx x;
6960 if (rn == FPUL_REG)
6961 x = gen_push_fpul ();
6962 else if (rn == FPSCR_REG)
6963 x = gen_push_fpscr ();
6964 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6965 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6967 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6968 return NULL_RTX;
6969 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6971 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6972 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6973 else
6974 x = gen_push (gen_rtx_REG (SImode, rn));
6976 x = frame_insn (x);
6977 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6978 return x;
6981 /* Output RTL to pop register RN from the stack. */
6982 static void
6983 pop (int rn)
6985 rtx x, sp_reg, reg;
6986 if (rn == FPUL_REG)
6987 x = gen_pop_fpul ();
6988 else if (rn == FPSCR_REG)
6989 x = gen_pop_fpscr ();
6990 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6991 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6993 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6994 return;
6995 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6997 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6998 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6999 else
7000 x = gen_pop (gen_rtx_REG (SImode, rn));
7002 x = emit_insn (x);
7004 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7005 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7006 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7007 : SET_DEST (PATTERN (x)));
7008 add_reg_note (x, REG_CFA_RESTORE, reg);
7009 add_reg_note (x, REG_CFA_ADJUST_CFA,
7010 gen_rtx_SET (SImode, sp_reg,
7011 plus_constant (SImode, sp_reg,
7012 GET_MODE_SIZE (GET_MODE (reg)))));
7013 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7014 RTX_FRAME_RELATED_P (x) = 1;
7017 /* Generate code to push the regs specified in the mask. */
7018 static void
7019 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7021 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7022 int skip_fpscr = 0;
7024 /* Push PR last; this gives better latencies after the prologue, and
7025 candidates for the return delay slot when there are no general
7026 registers pushed. */
7027 for (; i < FIRST_PSEUDO_REGISTER; i++)
7029 /* If this is an interrupt handler, and the SZ bit varies,
7030 and we have to push any floating point register, we need
7031 to switch to the correct precision first. */
7032 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7033 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7035 HARD_REG_SET unsaved;
7037 push (FPSCR_REG);
7038 COMPL_HARD_REG_SET (unsaved, *mask);
7039 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7040 skip_fpscr = 1;
7042 if (i != PR_REG
7043 && (i != FPSCR_REG || ! skip_fpscr)
7044 && TEST_HARD_REG_BIT (*mask, i))
7046 /* If the ISR has RESBANK attribute assigned, don't push any of
7047 the following registers - R0-R14, MACH, MACL and GBR. */
7048 if (! (sh_cfun_resbank_handler_p ()
7049 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7050 || i == MACH_REG
7051 || i == MACL_REG
7052 || i == GBR_REG)))
7053 push (i);
7057 /* Push banked registers last to improve delay slot opportunities. */
7058 if (interrupt_handler)
7060 bool use_movml = false;
7062 if (TARGET_SH2A)
7064 unsigned int count = 0;
7066 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7067 if (TEST_HARD_REG_BIT (*mask, i))
7068 count++;
7069 else
7070 break;
7072 /* Use movml when all banked registers are pushed. */
7073 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7074 use_movml = true;
7077 if (sh_cfun_resbank_handler_p ())
7078 ; /* Do nothing. */
7079 else if (use_movml)
7081 rtx x, mem, reg, set;
7082 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7084 /* We must avoid scheduling multiple store insn with another
7085 insns. */
7086 emit_insn (gen_blockage ());
7087 x = gen_movml_push_banked (sp_reg);
7088 x = frame_insn (x);
7089 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7091 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7092 reg = gen_rtx_REG (SImode, i);
7093 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
7096 set = gen_rtx_SET (SImode, sp_reg,
7097 plus_constant (Pmode, sp_reg, - 32));
7098 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7099 emit_insn (gen_blockage ());
7101 else
7102 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7103 if (TEST_HARD_REG_BIT (*mask, i))
7104 push (i);
7107 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7108 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7109 push (PR_REG);
7112 /* Calculate how much extra space is needed to save all callee-saved
7113 target registers.
7114 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7115 static int
7116 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7118 int reg;
7119 int stack_space = 0;
7120 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7122 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7123 if ((! call_really_used_regs[reg] || interrupt_handler)
7124 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7125 /* Leave space to save this target register on the stack,
7126 in case target register allocation wants to use it. */
7127 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7128 return stack_space;
7131 /* Decide whether we should reserve space for callee-save target registers,
7132 in case target register allocation wants to use them. REGS_SAVED is
7133 the space, in bytes, that is already required for register saves.
7134 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7135 static int
7136 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7137 HARD_REG_SET *live_regs_mask)
7139 if (optimize_size)
7140 return 0;
7141 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7144 /* Decide how much space to reserve for callee-save target registers
7145 in case target register allocation wants to use them.
7146 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7147 static int
7148 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7150 if (shmedia_space_reserved_for_target_registers)
7151 return shmedia_target_regs_stack_space (live_regs_mask);
7152 else
7153 return 0;
7156 /* Work out the registers which need to be saved, both as a mask and a
7157 count of saved words. Return the count.
7159 If doing a pragma interrupt function, then push all regs used by the
7160 function, and if we call another function (we can tell by looking at PR),
7161 make sure that all the regs it clobbers are safe too. */
7162 static int
7163 calc_live_regs (HARD_REG_SET *live_regs_mask)
7165 unsigned int reg;
7166 int count;
7167 tree attrs;
7168 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7169 bool nosave_low_regs;
7170 int pr_live, has_call;
7172 attrs = DECL_ATTRIBUTES (current_function_decl);
7173 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7174 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7175 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7176 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7178 CLEAR_HARD_REG_SET (*live_regs_mask);
7179 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7180 && df_regs_ever_live_p (FPSCR_REG))
7181 target_flags &= ~MASK_FPU_SINGLE;
7182 /* If we can save a lot of saves by switching to double mode, do that. */
7183 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7184 && TARGET_FPU_SINGLE)
7185 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7186 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7187 && (! call_really_used_regs[reg]
7188 || interrupt_handler)
7189 && ++count > 2)
7191 target_flags &= ~MASK_FPU_SINGLE;
7192 break;
7194 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7195 knows how to use it. That means the pseudo originally allocated for
7196 the initial value can become the PR_MEDIA_REG hard register, as seen for
7197 execute/20010122-1.c:test9. */
7198 if (TARGET_SHMEDIA)
7199 /* ??? this function is called from initial_elimination_offset, hence we
7200 can't use the result of sh_media_register_for_return here. */
7201 pr_live = sh_pr_n_sets ();
7202 else
7204 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7205 pr_live = (pr_initial
7206 ? (!REG_P (pr_initial)
7207 || REGNO (pr_initial) != (PR_REG))
7208 : df_regs_ever_live_p (PR_REG));
7209 /* For Shcompact, if not optimizing, we end up with a memory reference
7210 using the return address pointer for __builtin_return_address even
7211 though there is no actual need to put the PR register on the stack. */
7212 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7214 /* Force PR to be live if the prologue has to call the SHmedia
7215 argument decoder or register saver. */
7216 if (TARGET_SHCOMPACT
7217 && ((crtl->args.info.call_cookie
7218 & ~ CALL_COOKIE_RET_TRAMP (1))
7219 || crtl->saves_all_registers))
7220 pr_live = 1;
7221 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7222 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7224 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7225 ? pr_live
7226 : interrupt_handler
7227 ? (/* Need to save all the regs ever live. */
7228 (df_regs_ever_live_p (reg)
7229 || (call_really_used_regs[reg]
7230 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7231 || reg == PIC_OFFSET_TABLE_REGNUM)
7232 && has_call)
7233 || (TARGET_SHMEDIA && has_call
7234 && REGISTER_NATURAL_MODE (reg) == SImode
7235 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7236 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7237 && reg != RETURN_ADDRESS_POINTER_REGNUM
7238 && reg != T_REG && reg != GBR_REG
7239 /* Push fpscr only on targets which have FPU */
7240 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7241 : (/* Only push those regs which are used and need to be saved. */
7242 (TARGET_SHCOMPACT
7243 && flag_pic
7244 && crtl->args.info.call_cookie
7245 && reg == PIC_OFFSET_TABLE_REGNUM)
7246 || (df_regs_ever_live_p (reg)
7247 && ((!call_really_used_regs[reg]
7248 && !(reg != PIC_OFFSET_TABLE_REGNUM
7249 && fixed_regs[reg] && call_used_regs[reg]))
7250 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7251 || (crtl->calls_eh_return
7252 && (reg == EH_RETURN_DATA_REGNO (0)
7253 || reg == EH_RETURN_DATA_REGNO (1)
7254 || reg == EH_RETURN_DATA_REGNO (2)
7255 || reg == EH_RETURN_DATA_REGNO (3)))
7256 || ((reg == MACL_REG || reg == MACH_REG)
7257 && df_regs_ever_live_p (reg)
7258 && sh_cfun_attr_renesas_p ())
7261 SET_HARD_REG_BIT (*live_regs_mask, reg);
7262 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7264 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7265 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7267 if (FP_REGISTER_P (reg))
7269 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7271 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7272 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7275 else if (XD_REGISTER_P (reg))
7277 /* Must switch to double mode to access these registers. */
7278 target_flags &= ~MASK_FPU_SINGLE;
7282 if (nosave_low_regs && reg == R8_REG)
7283 break;
7285 /* If we have a target register optimization pass after prologue / epilogue
7286 threading, we need to assume all target registers will be live even if
7287 they aren't now. */
7288 if (flag_branch_target_load_optimize2
7289 && TARGET_SAVE_ALL_TARGET_REGS
7290 && shmedia_space_reserved_for_target_registers)
7291 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7292 if ((! call_really_used_regs[reg] || interrupt_handler)
7293 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7295 SET_HARD_REG_BIT (*live_regs_mask, reg);
7296 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7298 /* If this is an interrupt handler, we don't have any call-clobbered
7299 registers we can conveniently use for target register save/restore.
7300 Make sure we save at least one general purpose register when we need
7301 to save target registers. */
7302 if (interrupt_handler
7303 && hard_reg_set_intersect_p (*live_regs_mask,
7304 reg_class_contents[TARGET_REGS])
7305 && ! hard_reg_set_intersect_p (*live_regs_mask,
7306 reg_class_contents[GENERAL_REGS]))
7308 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7309 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7312 return count;
7315 /* Code to generate prologue and epilogue sequences */
7317 /* PUSHED is the number of bytes that are being pushed on the
7318 stack for register saves. Return the frame size, padded
7319 appropriately so that the stack stays properly aligned. */
7320 static HOST_WIDE_INT
7321 rounded_frame_size (int pushed)
7323 HOST_WIDE_INT size = get_frame_size ();
7324 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7326 if (ACCUMULATE_OUTGOING_ARGS)
7327 size += crtl->outgoing_args_size;
7329 return ((size + pushed + align - 1) & -align) - pushed;
7332 /* Choose a call-clobbered target-branch register that remains
7333 unchanged along the whole function. We set it up as the return
7334 value in the prologue. */
7336 sh_media_register_for_return (void)
7338 int regno;
7339 int tr0_used;
7341 if (! crtl->is_leaf)
7342 return -1;
7343 if (lookup_attribute ("interrupt_handler",
7344 DECL_ATTRIBUTES (current_function_decl)))
7345 return -1;
7346 if (sh_cfun_interrupt_handler_p ())
7347 return -1;
7349 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7351 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7352 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7353 return regno;
7355 return -1;
7358 /* The maximum registers we need to save are:
7359 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7360 - 32 floating point registers (for each pair, we save none,
7361 one single precision value, or a double precision value).
7362 - 8 target registers
7363 - add 1 entry for a delimiter. */
7364 #define MAX_SAVED_REGS (62+32+8)
7366 typedef struct save_entry_s
7368 unsigned char reg;
7369 unsigned char mode;
7370 short offset;
7371 } save_entry;
7373 #define MAX_TEMPS 4
7375 /* There will be a delimiter entry with VOIDmode both at the start and the
7376 end of a filled in schedule. The end delimiter has the offset of the
7377 save with the smallest (i.e. most negative) offset. */
7378 typedef struct save_schedule_s
7380 save_entry entries[MAX_SAVED_REGS + 2];
7381 int temps[MAX_TEMPS+1];
7382 } save_schedule;
7384 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7385 use reverse order. Returns the last entry written to (not counting
7386 the delimiter). OFFSET_BASE is a number to be added to all offset
7387 entries. */
7388 static save_entry *
7389 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7390 int offset_base)
7392 int align, i;
7393 save_entry *entry = schedule->entries;
7394 int tmpx = 0;
7395 int offset;
7397 if (! current_function_interrupt)
7398 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7399 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7400 && ! FUNCTION_ARG_REGNO_P (i)
7401 && i != FIRST_RET_REG
7402 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7403 && ! (crtl->calls_eh_return
7404 && (i == EH_RETURN_STACKADJ_REGNO
7405 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7406 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7407 schedule->temps[tmpx++] = i;
7408 entry->reg = -1;
7409 entry->mode = VOIDmode;
7410 entry->offset = offset_base;
7411 entry++;
7412 /* We loop twice: first, we save 8-byte aligned registers in the
7413 higher addresses, that are known to be aligned. Then, we
7414 proceed to saving 32-bit registers that don't need 8-byte
7415 alignment.
7416 If this is an interrupt function, all registers that need saving
7417 need to be saved in full. moreover, we need to postpone saving
7418 target registers till we have saved some general purpose registers
7419 we can then use as scratch registers. */
7420 offset = offset_base;
7421 for (align = 1; align >= 0; align--)
7423 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7424 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7426 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
7427 int reg = i;
7429 if (current_function_interrupt)
7431 if (TARGET_REGISTER_P (i))
7432 continue;
7433 if (GENERAL_REGISTER_P (i))
7434 mode = DImode;
7436 if (mode == SFmode && (i % 2) == 1
7437 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7438 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7440 mode = DFmode;
7441 i--;
7442 reg--;
7445 /* If we're doing the aligned pass and this is not aligned,
7446 or we're doing the unaligned pass and this is aligned,
7447 skip it. */
7448 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7449 != align)
7450 continue;
7452 if (current_function_interrupt
7453 && GENERAL_REGISTER_P (i)
7454 && tmpx < MAX_TEMPS)
7455 schedule->temps[tmpx++] = i;
7457 offset -= GET_MODE_SIZE (mode);
7458 entry->reg = i;
7459 entry->mode = mode;
7460 entry->offset = offset;
7461 entry++;
7463 if (align && current_function_interrupt)
7464 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7465 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7467 offset -= GET_MODE_SIZE (DImode);
7468 entry->reg = i;
7469 entry->mode = DImode;
7470 entry->offset = offset;
7471 entry++;
7474 entry->reg = -1;
7475 entry->mode = VOIDmode;
7476 entry->offset = offset;
7477 schedule->temps[tmpx] = -1;
7478 return entry - 1;
7481 /* Expand code for the function prologue. */
7482 void
7483 sh_expand_prologue (void)
7485 HARD_REG_SET live_regs_mask;
7486 int d, i;
7487 int d_rounding = 0;
7488 int save_flags = target_flags;
7489 int pretend_args;
7490 int stack_usage;
7491 tree sp_switch_attr
7492 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7494 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7496 /* We have pretend args if we had an object sent partially in registers
7497 and partially on the stack, e.g. a large structure. */
7498 pretend_args = crtl->args.pretend_args_size;
7499 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7500 && (NPARM_REGS(SImode)
7501 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7502 pretend_args = 0;
7504 output_stack_adjust (-pretend_args
7505 - crtl->args.info.stack_regs * 8,
7506 stack_pointer_rtx, 0, NULL, true);
7507 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7509 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7510 /* We're going to use the PIC register to load the address of the
7511 incoming-argument decoder and/or of the return trampoline from
7512 the GOT, so make sure the PIC register is preserved and
7513 initialized. */
7514 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7516 if (TARGET_SHCOMPACT
7517 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7519 int reg;
7521 /* First, make all registers with incoming arguments that will
7522 be pushed onto the stack live, so that register renaming
7523 doesn't overwrite them. */
7524 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7525 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7526 >= NPARM_REGS (SImode) - reg)
7527 for (; reg < NPARM_REGS (SImode); reg++)
7528 emit_insn (gen_shcompact_preserve_incoming_args
7529 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7530 else if (CALL_COOKIE_INT_REG_GET
7531 (crtl->args.info.call_cookie, reg) == 1)
7532 emit_insn (gen_shcompact_preserve_incoming_args
7533 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7535 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7536 stack_pointer_rtx);
7537 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7538 GEN_INT (crtl->args.info.call_cookie));
7539 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7540 gen_rtx_REG (SImode, R0_REG));
7542 else if (TARGET_SHMEDIA)
7544 int tr = sh_media_register_for_return ();
7546 if (tr >= 0)
7547 emit_move_insn (gen_rtx_REG (DImode, tr),
7548 gen_rtx_REG (DImode, PR_MEDIA_REG));
7551 /* Emit the code for SETUP_VARARGS. */
7552 if (cfun->stdarg)
7554 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7556 /* Push arg regs as if they'd been provided by caller in stack. */
7557 for (i = 0; i < NPARM_REGS(SImode); i++)
7559 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7561 if (i >= (NPARM_REGS(SImode)
7562 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7564 break;
7565 push (rn);
7566 stack_usage += GET_MODE_SIZE (SImode);
7571 /* If we're supposed to switch stacks at function entry, do so now. */
7572 if (sp_switch_attr)
7574 rtx lab, newsrc;
7575 /* The argument specifies a variable holding the address of the
7576 stack the interrupt function should switch to/from at entry/exit. */
7577 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7578 const char *s
7579 = ggc_strdup (TREE_STRING_POINTER (arg));
7580 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7582 lab = add_constant (sp_switch, SImode, 0);
7583 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7585 emit_insn (gen_sp_switch_1 (newsrc));
7588 d = calc_live_regs (&live_regs_mask);
7589 /* ??? Maybe we could save some switching if we can move a mode switch
7590 that already happens to be at the function start into the prologue. */
7591 if (target_flags != save_flags && ! current_function_interrupt)
7592 emit_insn (gen_toggle_sz ());
7594 if (TARGET_SH5)
7596 int offset_base, offset;
7597 rtx r0 = NULL_RTX;
7598 int offset_in_r0 = -1;
7599 int sp_in_r0 = 0;
7600 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7601 int total_size, save_size;
7602 save_schedule schedule;
7603 save_entry *entry;
7604 int *tmp_pnt;
7606 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7607 && ! current_function_interrupt)
7608 r0 = gen_rtx_REG (Pmode, R0_REG);
7610 /* D is the actual number of bytes that we need for saving registers,
7611 however, in initial_elimination_offset we have committed to using
7612 an additional TREGS_SPACE amount of bytes - in order to keep both
7613 addresses to arguments supplied by the caller and local variables
7614 valid, we must keep this gap. Place it between the incoming
7615 arguments and the actually saved registers in a bid to optimize
7616 locality of reference. */
7617 total_size = d + tregs_space;
7618 total_size += rounded_frame_size (total_size);
7619 save_size = total_size - rounded_frame_size (d);
7620 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7621 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7622 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7624 /* If adjusting the stack in a single step costs nothing extra, do so.
7625 I.e. either if a single addi is enough, or we need a movi anyway,
7626 and we don't exceed the maximum offset range (the test for the
7627 latter is conservative for simplicity). */
7628 if (TARGET_SHMEDIA
7629 && (CONST_OK_FOR_I10 (-total_size)
7630 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7631 && total_size <= 2044)))
7632 d_rounding = total_size - save_size;
7634 offset_base = d + d_rounding;
7636 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7637 0, NULL, true);
7638 stack_usage += save_size + d_rounding;
7640 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7641 tmp_pnt = schedule.temps;
7642 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7644 enum machine_mode mode = (enum machine_mode) entry->mode;
7645 unsigned int reg = entry->reg;
7646 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7647 rtx orig_reg_rtx;
7649 offset = entry->offset;
7651 reg_rtx = gen_rtx_REG (mode, reg);
7653 mem_rtx = gen_frame_mem (mode,
7654 gen_rtx_PLUS (Pmode,
7655 stack_pointer_rtx,
7656 GEN_INT (offset)));
7658 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7660 gcc_assert (r0);
7661 mem_rtx = NULL_RTX;
7664 if (HAVE_PRE_DECREMENT
7665 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7666 || mem_rtx == NULL_RTX
7667 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7669 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7671 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7672 pre_dec = NULL_RTX;
7673 else
7675 mem_rtx = NULL_RTX;
7676 offset += GET_MODE_SIZE (mode);
7680 if (mem_rtx != NULL_RTX)
7681 goto addr_ok;
7683 if (offset_in_r0 == -1)
7685 emit_move_insn (r0, GEN_INT (offset));
7686 offset_in_r0 = offset;
7688 else if (offset != offset_in_r0)
7690 emit_move_insn (r0,
7691 gen_rtx_PLUS
7692 (Pmode, r0,
7693 GEN_INT (offset - offset_in_r0)));
7694 offset_in_r0 += offset - offset_in_r0;
7697 if (pre_dec != NULL_RTX)
7699 if (! sp_in_r0)
7701 emit_move_insn (r0,
7702 gen_rtx_PLUS
7703 (Pmode, r0, stack_pointer_rtx));
7704 sp_in_r0 = 1;
7707 offset -= GET_MODE_SIZE (mode);
7708 offset_in_r0 -= GET_MODE_SIZE (mode);
7710 mem_rtx = pre_dec;
7712 else if (sp_in_r0)
7713 mem_rtx = gen_frame_mem (mode, r0);
7714 else
7715 mem_rtx = gen_frame_mem (mode,
7716 gen_rtx_PLUS (Pmode,
7717 stack_pointer_rtx,
7718 r0));
7720 /* We must not use an r0-based address for target-branch
7721 registers or for special registers without pre-dec
7722 memory addresses, since we store their values in r0
7723 first. */
7724 gcc_assert (!TARGET_REGISTER_P (reg)
7725 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7726 || mem_rtx == pre_dec));
7728 addr_ok:
7729 orig_reg_rtx = reg_rtx;
7730 if (TARGET_REGISTER_P (reg)
7731 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7732 && mem_rtx != pre_dec))
7734 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7736 emit_move_insn (tmp_reg, reg_rtx);
7738 if (REGNO (tmp_reg) == R0_REG)
7740 offset_in_r0 = -1;
7741 sp_in_r0 = 0;
7742 gcc_assert (!refers_to_regno_p
7743 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7746 if (*++tmp_pnt <= 0)
7747 tmp_pnt = schedule.temps;
7749 reg_rtx = tmp_reg;
7752 rtx insn;
7754 /* Mark as interesting for dwarf cfi generator */
7755 insn = emit_move_insn (mem_rtx, reg_rtx);
7756 RTX_FRAME_RELATED_P (insn) = 1;
7757 /* If we use an intermediate register for the save, we can't
7758 describe this exactly in cfi as a copy of the to-be-saved
7759 register into the temporary register and then the temporary
7760 register on the stack, because the temporary register can
7761 have a different natural size than the to-be-saved register.
7762 Thus, we gloss over the intermediate copy and pretend we do
7763 a direct save from the to-be-saved register. */
7764 if (REGNO (reg_rtx) != reg)
7766 rtx set;
7768 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7769 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7772 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7774 rtx reg_rtx = gen_rtx_REG (mode, reg);
7775 rtx set;
7776 rtx mem_rtx = gen_frame_mem (mode,
7777 gen_rtx_PLUS (Pmode,
7778 stack_pointer_rtx,
7779 GEN_INT (offset)));
7781 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7782 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7787 gcc_assert (entry->offset == d_rounding);
7789 else
7791 push_regs (&live_regs_mask, current_function_interrupt);
7792 stack_usage += d;
7795 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7796 emit_insn (gen_GOTaddr2picreg ());
7798 if (SHMEDIA_REGS_STACK_ADJUST ())
7800 /* This must NOT go through the PLT, otherwise mach and macl
7801 may be clobbered. */
7802 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7803 (TARGET_FPU_ANY
7804 ? "__GCC_push_shmedia_regs"
7805 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7806 emit_insn (gen_shmedia_save_restore_regs_compact
7807 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7810 if (target_flags != save_flags && ! current_function_interrupt)
7811 emit_insn (gen_toggle_sz ());
7813 target_flags = save_flags;
7815 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7816 stack_pointer_rtx, 0, NULL, true);
7817 stack_usage += rounded_frame_size (d) - d_rounding;
7819 if (frame_pointer_needed)
7820 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7822 if (TARGET_SHCOMPACT
7823 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7825 /* This must NOT go through the PLT, otherwise mach and macl
7826 may be clobbered. */
7827 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7828 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7829 emit_insn (gen_shcompact_incoming_args ());
7832 /* If we are profiling, make sure no instructions are scheduled before
7833 the call to mcount. Similarly if some call instructions are swapped
7834 before frame related insns, it'll confuse the unwinder because
7835 currently SH has no unwind info for function epilogues. */
7836 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7837 emit_insn (gen_blockage ());
7839 if (flag_stack_usage_info)
7840 current_function_static_stack_size = stack_usage;
7843 /* Expand code for the function epilogue. */
7844 void
7845 sh_expand_epilogue (bool sibcall_p)
7847 HARD_REG_SET live_regs_mask;
7848 int d, i;
7849 int d_rounding = 0;
7851 int save_flags = target_flags;
7852 int frame_size, save_size;
7853 int fpscr_deferred = 0;
7854 int e = sibcall_p ? -1 : 1;
7856 d = calc_live_regs (&live_regs_mask);
7858 save_size = d;
7859 frame_size = rounded_frame_size (d);
7861 if (TARGET_SH5)
7863 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7864 int total_size;
7865 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7866 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7867 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7869 total_size = d + tregs_space;
7870 total_size += rounded_frame_size (total_size);
7871 save_size = total_size - frame_size;
7873 /* If adjusting the stack in a single step costs nothing extra, do so.
7874 I.e. either if a single addi is enough, or we need a movi anyway,
7875 and we don't exceed the maximum offset range (the test for the
7876 latter is conservative for simplicity). */
7877 if (TARGET_SHMEDIA
7878 && ! frame_pointer_needed
7879 && (CONST_OK_FOR_I10 (total_size)
7880 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7881 && total_size <= 2044)))
7882 d_rounding = frame_size;
7884 frame_size -= d_rounding;
7887 if (frame_pointer_needed)
7889 /* We must avoid scheduling the epilogue with previous basic blocks.
7890 See PR/18032 and PR/40313. */
7891 emit_insn (gen_blockage ());
7892 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7893 &live_regs_mask, true);
7895 /* We must avoid moving the stack pointer adjustment past code
7896 which reads from the local frame, else an interrupt could
7897 occur after the SP adjustment and clobber data in the local
7898 frame. */
7899 emit_insn (gen_blockage ());
7900 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7902 else if (frame_size)
7904 /* We must avoid moving the stack pointer adjustment past code
7905 which reads from the local frame, else an interrupt could
7906 occur after the SP adjustment and clobber data in the local
7907 frame. */
7908 emit_insn (gen_blockage ());
7909 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7910 &live_regs_mask, true);
7913 if (SHMEDIA_REGS_STACK_ADJUST ())
7915 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7916 (TARGET_FPU_ANY
7917 ? "__GCC_pop_shmedia_regs"
7918 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7919 /* This must NOT go through the PLT, otherwise mach and macl
7920 may be clobbered. */
7921 emit_insn (gen_shmedia_save_restore_regs_compact
7922 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7925 /* Pop all the registers. */
7927 if (target_flags != save_flags && ! current_function_interrupt)
7928 emit_insn (gen_toggle_sz ());
7929 if (TARGET_SH5)
7931 int offset_base, offset;
7932 int offset_in_r0 = -1;
7933 int sp_in_r0 = 0;
7934 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7935 save_schedule schedule;
7936 save_entry *entry;
7937 int *tmp_pnt;
7939 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7940 offset_base = -entry[1].offset + d_rounding;
7941 tmp_pnt = schedule.temps;
7942 for (; entry->mode != VOIDmode; entry--)
7944 enum machine_mode mode = (enum machine_mode) entry->mode;
7945 int reg = entry->reg;
7946 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7948 offset = offset_base + entry->offset;
7949 reg_rtx = gen_rtx_REG (mode, reg);
7951 mem_rtx = gen_frame_mem (mode,
7952 gen_rtx_PLUS (Pmode,
7953 stack_pointer_rtx,
7954 GEN_INT (offset)));
7956 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7957 mem_rtx = NULL_RTX;
7959 if (HAVE_POST_INCREMENT
7960 && (offset == offset_in_r0
7961 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7962 && mem_rtx == NULL_RTX)
7963 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7965 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7967 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7968 post_inc = NULL_RTX;
7969 else
7970 mem_rtx = NULL_RTX;
7973 if (mem_rtx != NULL_RTX)
7974 goto addr_ok;
7976 if (offset_in_r0 == -1)
7978 emit_move_insn (r0, GEN_INT (offset));
7979 offset_in_r0 = offset;
7981 else if (offset != offset_in_r0)
7983 emit_move_insn (r0,
7984 gen_rtx_PLUS
7985 (Pmode, r0,
7986 GEN_INT (offset - offset_in_r0)));
7987 offset_in_r0 += offset - offset_in_r0;
7990 if (post_inc != NULL_RTX)
7992 if (! sp_in_r0)
7994 emit_move_insn (r0,
7995 gen_rtx_PLUS
7996 (Pmode, r0, stack_pointer_rtx));
7997 sp_in_r0 = 1;
8000 mem_rtx = post_inc;
8002 offset_in_r0 += GET_MODE_SIZE (mode);
8004 else if (sp_in_r0)
8005 mem_rtx = gen_frame_mem (mode, r0);
8006 else
8007 mem_rtx = gen_frame_mem (mode,
8008 gen_rtx_PLUS (Pmode,
8009 stack_pointer_rtx,
8010 r0));
8012 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8013 || mem_rtx == post_inc);
8015 addr_ok:
8016 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8017 && mem_rtx != post_inc)
8019 emit_move_insn (r0, mem_rtx);
8020 mem_rtx = r0;
8022 else if (TARGET_REGISTER_P (reg))
8024 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8026 /* Give the scheduler a bit of freedom by using up to
8027 MAX_TEMPS registers in a round-robin fashion. */
8028 emit_move_insn (tmp_reg, mem_rtx);
8029 mem_rtx = tmp_reg;
8030 if (*++tmp_pnt < 0)
8031 tmp_pnt = schedule.temps;
8034 emit_move_insn (reg_rtx, mem_rtx);
8037 gcc_assert (entry->offset + offset_base == d + d_rounding);
8039 else /* ! TARGET_SH5 */
8041 int last_reg;
8043 save_size = 0;
8044 /* For an ISR with RESBANK attribute assigned, don't pop PR
8045 register. */
8046 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8047 && !sh_cfun_resbank_handler_p ())
8049 if (!frame_pointer_needed)
8050 emit_insn (gen_blockage ());
8051 pop (PR_REG);
8054 /* Banked registers are popped first to avoid being scheduled in the
8055 delay slot. RTE switches banks before the ds instruction. */
8056 if (current_function_interrupt)
8058 bool use_movml = false;
8060 if (TARGET_SH2A)
8062 unsigned int count = 0;
8064 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8065 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8066 count++;
8067 else
8068 break;
8070 /* Use movml when all banked register are poped. */
8071 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8072 use_movml = true;
8075 if (sh_cfun_resbank_handler_p ())
8076 ; /* Do nothing. */
8077 else if (use_movml)
8079 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8081 /* We must avoid scheduling multiple load insn with another
8082 insns. */
8083 emit_insn (gen_blockage ());
8084 emit_insn (gen_movml_pop_banked (sp_reg));
8085 emit_insn (gen_blockage ());
8087 else
8088 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8089 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8090 pop (i);
8092 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8094 else
8095 last_reg = FIRST_PSEUDO_REGISTER;
8097 for (i = 0; i < last_reg; i++)
8099 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8101 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8102 && hard_reg_set_intersect_p (live_regs_mask,
8103 reg_class_contents[DF_REGS]))
8104 fpscr_deferred = 1;
8105 /* For an ISR with RESBANK attribute assigned, don't pop
8106 following registers, R0-R14, MACH, MACL and GBR. */
8107 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8108 && ! (sh_cfun_resbank_handler_p ()
8109 && ((j >= FIRST_GENERAL_REG
8110 && j < LAST_GENERAL_REG)
8111 || j == MACH_REG
8112 || j == MACL_REG
8113 || j == GBR_REG)))
8114 pop (j);
8116 if (j == FIRST_FP_REG && fpscr_deferred)
8117 pop (FPSCR_REG);
8120 if (target_flags != save_flags && ! current_function_interrupt)
8121 emit_insn (gen_toggle_sz ());
8122 target_flags = save_flags;
8124 output_stack_adjust (crtl->args.pretend_args_size
8125 + save_size + d_rounding
8126 + crtl->args.info.stack_regs * 8,
8127 stack_pointer_rtx, e, NULL, true);
8129 if (crtl->calls_eh_return)
8130 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8131 EH_RETURN_STACKADJ_RTX));
8133 /* Switch back to the normal stack if necessary. */
8134 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8135 emit_insn (gen_sp_switch_2 ());
8137 /* Tell flow the insn that pops PR isn't dead. */
8138 /* PR_REG will never be live in SHmedia mode, and we don't need to
8139 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8140 by the return pattern. */
8141 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8142 emit_use (gen_rtx_REG (SImode, PR_REG));
8145 /* Emit code to change the current function's return address to RA.
8146 TEMP is available as a scratch register, if needed. */
8147 void
8148 sh_set_return_address (rtx ra, rtx tmp)
8150 HARD_REG_SET live_regs_mask;
8151 int d;
8152 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8153 int pr_offset;
8155 d = calc_live_regs (&live_regs_mask);
8157 /* If pr_reg isn't life, we can set it (or the register given in
8158 sh_media_register_for_return) directly. */
8159 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8161 rtx rr;
8163 if (TARGET_SHMEDIA)
8165 int rr_regno = sh_media_register_for_return ();
8167 if (rr_regno < 0)
8168 rr_regno = pr_reg;
8170 rr = gen_rtx_REG (DImode, rr_regno);
8172 else
8173 rr = gen_rtx_REG (SImode, pr_reg);
8175 emit_insn (GEN_MOV (rr, ra));
8176 /* Tell flow the register for return isn't dead. */
8177 emit_use (rr);
8178 return;
8181 if (TARGET_SH5)
8183 int offset;
8184 save_schedule schedule;
8185 save_entry *entry;
8187 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8188 offset = entry[1].offset;
8189 for (; entry->mode != VOIDmode; entry--)
8190 if (entry->reg == pr_reg)
8191 goto found;
8193 /* We can't find pr register. */
8194 gcc_unreachable ();
8196 found:
8197 offset = entry->offset - offset;
8198 pr_offset = (rounded_frame_size (d) + offset
8199 + SHMEDIA_REGS_STACK_ADJUST ());
8201 else
8202 pr_offset = rounded_frame_size (d);
8204 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8206 if (frame_pointer_needed)
8207 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8208 else
8209 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8211 tmp = gen_frame_mem (Pmode, tmp);
8212 emit_insn (GEN_MOV (tmp, ra));
8213 /* Tell this store isn't dead. */
8214 emit_use (tmp);
8217 /* Clear variables at function end. */
8218 static void
8219 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8220 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8224 static rtx
8225 sh_builtin_saveregs (void)
8227 /* First unnamed integer register. */
8228 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8229 /* Number of integer registers we need to save. */
8230 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8231 /* First unnamed SFmode float reg */
8232 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8233 /* Number of SFmode float regs to save. */
8234 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8235 rtx regbuf, fpregs;
8236 int bufsize, regno;
8237 alias_set_type alias_set;
8239 if (TARGET_SH5)
8241 if (n_intregs)
8243 int pushregs = n_intregs;
8245 while (pushregs < NPARM_REGS (SImode) - 1
8246 && (CALL_COOKIE_INT_REG_GET
8247 (crtl->args.info.call_cookie,
8248 NPARM_REGS (SImode) - pushregs)
8249 == 1))
8251 crtl->args.info.call_cookie
8252 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8253 - pushregs, 1);
8254 pushregs++;
8257 if (pushregs == NPARM_REGS (SImode))
8258 crtl->args.info.call_cookie
8259 |= (CALL_COOKIE_INT_REG (0, 1)
8260 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8261 else
8262 crtl->args.info.call_cookie
8263 |= CALL_COOKIE_STACKSEQ (pushregs);
8265 crtl->args.pretend_args_size += 8 * n_intregs;
8267 if (TARGET_SHCOMPACT)
8268 return const0_rtx;
8271 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8273 error ("__builtin_saveregs not supported by this subtarget");
8274 return const0_rtx;
8277 if (TARGET_SHMEDIA)
8278 n_floatregs = 0;
8280 /* Allocate block of memory for the regs. */
8281 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8282 Or can assign_stack_local accept a 0 SIZE argument? */
8283 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8285 if (TARGET_SHMEDIA)
8286 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8287 else if (n_floatregs & 1)
8289 rtx addr;
8291 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8292 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8293 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8294 regbuf = change_address (regbuf, BLKmode, addr);
8296 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8298 rtx addr, mask;
8300 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8301 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8302 XEXP (regbuf, 0), 4));
8303 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8304 emit_insn (gen_andsi3 (addr, addr, mask));
8305 regbuf = change_address (regbuf, BLKmode, addr);
8307 else
8308 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8309 alias_set = get_varargs_alias_set ();
8310 set_mem_alias_set (regbuf, alias_set);
8312 /* Save int args.
8313 This is optimized to only save the regs that are necessary. Explicitly
8314 named args need not be saved. */
8315 if (n_intregs > 0)
8316 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8317 adjust_address (regbuf, BLKmode,
8318 n_floatregs * UNITS_PER_WORD),
8319 n_intregs);
8321 if (TARGET_SHMEDIA)
8322 /* Return the address of the regbuf. */
8323 return XEXP (regbuf, 0);
8325 /* Save float args.
8326 This is optimized to only save the regs that are necessary. Explicitly
8327 named args need not be saved.
8328 We explicitly build a pointer to the buffer because it halves the insn
8329 count when not optimizing (otherwise the pointer is built for each reg
8330 saved).
8331 We emit the moves in reverse order so that we can use predecrement. */
8333 fpregs = copy_to_mode_reg (Pmode,
8334 plus_constant (Pmode, XEXP (regbuf, 0),
8335 n_floatregs * UNITS_PER_WORD));
8336 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8338 rtx mem;
8339 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8341 emit_insn (gen_addsi3 (fpregs, fpregs,
8342 GEN_INT (-2 * UNITS_PER_WORD)));
8343 mem = change_address (regbuf, DFmode, fpregs);
8344 emit_move_insn (mem,
8345 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8347 regno = first_floatreg;
8348 if (regno & 1)
8350 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8351 mem = change_address (regbuf, SFmode, fpregs);
8352 emit_move_insn (mem,
8353 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8354 + regno - SH_REG_MSW_OFFSET));
8357 else
8358 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8360 rtx mem;
8362 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8363 mem = change_address (regbuf, SFmode, fpregs);
8364 emit_move_insn (mem,
8365 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8368 /* Return the address of the regbuf. */
8369 return XEXP (regbuf, 0);
8372 /* Define the `__builtin_va_list' type for the ABI. */
8373 static tree
8374 sh_build_builtin_va_list (void)
8376 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8377 tree record, type_decl;
8379 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8380 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8381 return ptr_type_node;
8383 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8384 type_decl = build_decl (BUILTINS_LOCATION,
8385 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8387 f_next_o = build_decl (BUILTINS_LOCATION,
8388 FIELD_DECL, get_identifier ("__va_next_o"),
8389 ptr_type_node);
8390 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8391 FIELD_DECL,
8392 get_identifier ("__va_next_o_limit"),
8393 ptr_type_node);
8394 f_next_fp = build_decl (BUILTINS_LOCATION,
8395 FIELD_DECL, get_identifier ("__va_next_fp"),
8396 ptr_type_node);
8397 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8398 FIELD_DECL,
8399 get_identifier ("__va_next_fp_limit"),
8400 ptr_type_node);
8401 f_next_stack = build_decl (BUILTINS_LOCATION,
8402 FIELD_DECL, get_identifier ("__va_next_stack"),
8403 ptr_type_node);
8405 DECL_FIELD_CONTEXT (f_next_o) = record;
8406 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8407 DECL_FIELD_CONTEXT (f_next_fp) = record;
8408 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8409 DECL_FIELD_CONTEXT (f_next_stack) = record;
8411 TYPE_STUB_DECL (record) = type_decl;
8412 TYPE_NAME (record) = type_decl;
8413 TYPE_FIELDS (record) = f_next_o;
8414 DECL_CHAIN (f_next_o) = f_next_o_limit;
8415 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8416 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8417 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8419 layout_type (record);
8421 return record;
8424 /* Implement `va_start' for varargs and stdarg. */
8425 static void
8426 sh_va_start (tree valist, rtx nextarg)
8428 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8429 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8430 tree t, u;
8431 int nfp, nint;
8433 if (TARGET_SH5)
8435 expand_builtin_saveregs ();
8436 std_expand_builtin_va_start (valist, nextarg);
8437 return;
8440 if ((! TARGET_SH2E && ! TARGET_SH4)
8441 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8443 std_expand_builtin_va_start (valist, nextarg);
8444 return;
8447 f_next_o = TYPE_FIELDS (va_list_type_node);
8448 f_next_o_limit = DECL_CHAIN (f_next_o);
8449 f_next_fp = DECL_CHAIN (f_next_o_limit);
8450 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8451 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8453 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8454 NULL_TREE);
8455 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8456 valist, f_next_o_limit, NULL_TREE);
8457 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8458 NULL_TREE);
8459 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8460 valist, f_next_fp_limit, NULL_TREE);
8461 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8462 valist, f_next_stack, NULL_TREE);
8464 /* Call __builtin_saveregs. */
8465 u = make_tree (sizetype, expand_builtin_saveregs ());
8466 u = fold_convert (ptr_type_node, u);
8467 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8468 TREE_SIDE_EFFECTS (t) = 1;
8469 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8471 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8472 if (nfp < 8)
8473 nfp = 8 - nfp;
8474 else
8475 nfp = 0;
8476 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8477 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8478 TREE_SIDE_EFFECTS (t) = 1;
8479 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8481 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8482 TREE_SIDE_EFFECTS (t) = 1;
8483 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8485 nint = crtl->args.info.arg_count[SH_ARG_INT];
8486 if (nint < 4)
8487 nint = 4 - nint;
8488 else
8489 nint = 0;
8490 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8491 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8492 TREE_SIDE_EFFECTS (t) = 1;
8493 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8495 u = make_tree (ptr_type_node, nextarg);
8496 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8497 TREE_SIDE_EFFECTS (t) = 1;
8498 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8501 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8502 member, return it. */
8503 static tree
8504 find_sole_member (tree type)
8506 tree field, member = NULL_TREE;
8508 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8510 if (TREE_CODE (field) != FIELD_DECL)
8511 continue;
8512 if (!DECL_SIZE (field))
8513 return NULL_TREE;
8514 if (integer_zerop (DECL_SIZE (field)))
8515 continue;
8516 if (member)
8517 return NULL_TREE;
8518 member = field;
8520 return member;
8523 /* Implement `va_arg'. */
8524 static tree
8525 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8526 gimple_seq *post_p ATTRIBUTE_UNUSED)
8528 HOST_WIDE_INT size, rsize;
8529 tree tmp, pptr_type_node;
8530 tree addr, lab_over = NULL, result = NULL;
8531 bool pass_by_ref;
8532 tree eff_type;
8534 if (!VOID_TYPE_P (type))
8535 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8536 else
8537 pass_by_ref = false;
8539 if (pass_by_ref)
8540 type = build_pointer_type (type);
8542 size = int_size_in_bytes (type);
8543 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8544 pptr_type_node = build_pointer_type (ptr_type_node);
8546 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8547 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8549 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8550 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8551 int pass_as_float;
8552 tree lab_false;
8553 tree member;
8555 f_next_o = TYPE_FIELDS (va_list_type_node);
8556 f_next_o_limit = DECL_CHAIN (f_next_o);
8557 f_next_fp = DECL_CHAIN (f_next_o_limit);
8558 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8559 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8561 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8562 NULL_TREE);
8563 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8564 valist, f_next_o_limit, NULL_TREE);
8565 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8566 valist, f_next_fp, NULL_TREE);
8567 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8568 valist, f_next_fp_limit, NULL_TREE);
8569 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8570 valist, f_next_stack, NULL_TREE);
8572 /* Structures with a single member with a distinct mode are passed
8573 like their member. This is relevant if the latter has a REAL_TYPE
8574 or COMPLEX_TYPE type. */
8575 eff_type = type;
8576 while (TREE_CODE (eff_type) == RECORD_TYPE
8577 && (member = find_sole_member (eff_type))
8578 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8579 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8580 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8582 tree field_type = TREE_TYPE (member);
8584 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8585 eff_type = field_type;
8586 else
8588 gcc_assert ((TYPE_ALIGN (eff_type)
8589 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8590 || (TYPE_ALIGN (eff_type)
8591 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8592 break;
8596 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8598 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8599 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8600 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8601 && size <= 16));
8603 else
8605 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8608 addr = create_tmp_var (pptr_type_node, NULL);
8609 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8610 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8612 valist = build_simple_mem_ref (addr);
8614 if (pass_as_float)
8616 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8617 tree cmp;
8618 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8620 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8621 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8623 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8624 tmp = next_fp_limit;
8625 if (size > 4 && !is_double)
8626 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8627 tmp = build2 (GE_EXPR, boolean_type_node,
8628 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8629 cmp = build3 (COND_EXPR, void_type_node, tmp,
8630 build1 (GOTO_EXPR, void_type_node,
8631 unshare_expr (lab_false)), NULL_TREE);
8632 if (!is_double)
8633 gimplify_and_add (cmp, pre_p);
8635 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8636 || (is_double || size == 16))
8638 tmp = fold_convert (sizetype, next_fp_tmp);
8639 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8640 size_int (UNITS_PER_WORD));
8641 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8642 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8644 if (is_double)
8645 gimplify_and_add (cmp, pre_p);
8647 #ifdef FUNCTION_ARG_SCmode_WART
8648 if (TYPE_MODE (eff_type) == SCmode
8649 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8651 tree subtype = TREE_TYPE (eff_type);
8652 tree real, imag;
8654 imag
8655 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8656 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8658 real
8659 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8660 real = get_initialized_tmp_var (real, pre_p, NULL);
8662 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8663 if (type != eff_type)
8664 result = build1 (VIEW_CONVERT_EXPR, type, result);
8665 result = get_initialized_tmp_var (result, pre_p, NULL);
8667 #endif /* FUNCTION_ARG_SCmode_WART */
8669 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8670 gimplify_and_add (tmp, pre_p);
8672 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8673 gimplify_and_add (tmp, pre_p);
8675 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8676 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8677 gimplify_assign (unshare_expr (next_fp_tmp),
8678 unshare_expr (valist), pre_p);
8680 gimplify_assign (unshare_expr (valist),
8681 unshare_expr (next_fp_tmp), post_p);
8682 valist = next_fp_tmp;
8684 else
8686 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8687 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8688 unshare_expr (next_o_limit));
8689 tmp = build3 (COND_EXPR, void_type_node, tmp,
8690 build1 (GOTO_EXPR, void_type_node,
8691 unshare_expr (lab_false)),
8692 NULL_TREE);
8693 gimplify_and_add (tmp, pre_p);
8695 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8696 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8698 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8699 gimplify_and_add (tmp, pre_p);
8701 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8702 gimplify_and_add (tmp, pre_p);
8704 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8705 gimplify_assign (unshare_expr (next_o),
8706 unshare_expr (next_o_limit), pre_p);
8708 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8709 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8712 if (!result)
8714 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8715 gimplify_and_add (tmp, pre_p);
8719 /* ??? In va-sh.h, there had been code to make values larger than
8720 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8722 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8723 if (result)
8725 gimplify_assign (result, tmp, pre_p);
8726 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8727 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8728 gimplify_and_add (tmp, pre_p);
8730 else
8731 result = tmp;
8733 if (pass_by_ref)
8734 result = build_va_arg_indirect_ref (result);
8736 return result;
8739 /* 64 bit floating points memory transfers are paired single precision loads
8740 or store. So DWARF information needs fixing in little endian (unless
8741 PR=SZ=1 in FPSCR). */
8743 sh_dwarf_register_span (rtx reg)
8745 unsigned regno = REGNO (reg);
8747 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8748 return NULL_RTX;
8750 return
8751 gen_rtx_PARALLEL (VOIDmode,
8752 gen_rtvec (2,
8753 gen_rtx_REG (SFmode, regno + 1),
8754 gen_rtx_REG (SFmode, regno)));
8757 static enum machine_mode
8758 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8759 int *punsignedp, const_tree funtype,
8760 int for_return)
8762 if (sh_promote_prototypes (funtype))
8763 return promote_mode (type, mode, punsignedp);
8764 else
8765 return default_promote_function_mode (type, mode, punsignedp, funtype,
8766 for_return);
8769 static bool
8770 sh_promote_prototypes (const_tree type)
8772 if (TARGET_HITACHI)
8773 return false;
8774 if (! type)
8775 return true;
8776 return ! sh_attr_renesas_p (type);
8779 /* Whether an argument must be passed by reference. On SHcompact, we
8780 pretend arguments wider than 32-bits that would have been passed in
8781 registers are passed by reference, so that an SHmedia trampoline
8782 loads them into the full 64-bits registers. */
8783 static int
8784 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8785 const_tree type, bool named)
8787 unsigned HOST_WIDE_INT size;
8789 if (type)
8790 size = int_size_in_bytes (type);
8791 else
8792 size = GET_MODE_SIZE (mode);
8794 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8795 && (!named
8796 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8797 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8798 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8799 && size > 4
8800 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8801 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8802 return size;
8803 else
8804 return 0;
8807 static bool
8808 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8809 const_tree type, bool named)
8811 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8813 if (targetm.calls.must_pass_in_stack (mode, type))
8814 return true;
8816 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8817 wants to know about pass-by-reference semantics for incoming
8818 arguments. */
8819 if (! cum)
8820 return false;
8822 if (TARGET_SHCOMPACT)
8824 cum->byref = shcompact_byref (cum, mode, type, named);
8825 return cum->byref != 0;
8828 return false;
8831 static bool
8832 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8833 const_tree type, bool named ATTRIBUTE_UNUSED)
8835 /* ??? How can it possibly be correct to return true only on the
8836 caller side of the equation? Is there someplace else in the
8837 sh backend that's magically producing the copies? */
8838 return (get_cumulative_args (cum)->outgoing
8839 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8840 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8843 /* Round a register number up to a proper boundary for an arg of mode
8844 MODE.
8845 The SH doesn't care about double alignment, so we only
8846 round doubles to even regs when asked to explicitly. */
8847 static int
8848 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
8850 /* FIXME: This used to be a macro and has been copy pasted into this
8851 function as is. Make this more readable. */
8852 return
8853 (((TARGET_ALIGN_DOUBLE
8854 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
8855 && (mode == DFmode || mode == DCmode)
8856 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
8857 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
8858 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
8859 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
8860 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
8863 /* Return true if arg of the specified mode should be be passed in a register
8864 or false otherwise. */
8865 static bool
8866 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
8867 const_tree type)
8869 /* FIXME: This used to be a macro and has been copy pasted into this
8870 function as is. Make this more readable. */
8871 return
8872 ((type == 0
8873 || (! TREE_ADDRESSABLE (type)
8874 && (! (TARGET_HITACHI || cum.renesas_abi)
8875 || ! (AGGREGATE_TYPE_P (type)
8876 || (!TARGET_FPU_ANY
8877 && (GET_MODE_CLASS (mode) == MODE_FLOAT
8878 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
8879 && ! cum.force_mem
8880 && (TARGET_SH2E
8881 ? ((mode) == BLKmode
8882 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
8883 + int_size_in_bytes (type))
8884 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
8885 : ((sh_round_reg (cum, mode)
8886 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
8887 <= NPARM_REGS (mode)))
8888 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
8891 static int
8892 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8893 tree type, bool named ATTRIBUTE_UNUSED)
8895 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8896 int words = 0;
8898 if (!TARGET_SH5
8899 && sh_pass_in_reg_p (*cum, mode, type)
8900 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8901 && (sh_round_reg (*cum, mode)
8902 + (mode != BLKmode
8903 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
8904 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
8905 > NPARM_REGS (mode)))
8906 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
8908 else if (!TARGET_SHCOMPACT
8909 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8910 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8912 return words * UNITS_PER_WORD;
8916 /* Define where to put the arguments to a function.
8917 Value is zero to push the argument on the stack,
8918 or a hard register in which to store the argument.
8920 MODE is the argument's machine mode.
8921 TYPE is the data type of the argument (as a tree).
8922 This is null for libcalls where that information may
8923 not be available.
8924 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8925 the preceding args and about the function being called.
8926 NAMED is nonzero if this argument is a named parameter
8927 (otherwise it is an extra parameter matching an ellipsis).
8929 On SH the first args are normally in registers
8930 and the rest are pushed. Any arg that starts within the first
8931 NPARM_REGS words is at least partially passed in a register unless
8932 its data type forbids. */
8933 static rtx
8934 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8935 const_tree type, bool named)
8937 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8939 if (! TARGET_SH5 && mode == VOIDmode)
8940 return GEN_INT (ca->renesas_abi ? 1 : 0);
8942 if (! TARGET_SH5
8943 && sh_pass_in_reg_p (*ca, mode, type)
8944 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8946 int regno;
8948 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8949 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8951 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8952 gen_rtx_REG (SFmode,
8953 BASE_ARG_REG (mode)
8954 + (sh_round_reg (*ca, mode) ^ 1)),
8955 const0_rtx);
8956 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8957 gen_rtx_REG (SFmode,
8958 BASE_ARG_REG (mode)
8959 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8960 GEN_INT (4));
8961 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8964 /* If the alignment of a DF value causes an SF register to be
8965 skipped, we will use that skipped register for the next SF
8966 value. */
8967 if ((TARGET_HITACHI || ca->renesas_abi)
8968 && ca->free_single_fp_reg
8969 && mode == SFmode)
8970 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8972 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8973 ^ (mode == SFmode && TARGET_SH4
8974 && TARGET_LITTLE_ENDIAN
8975 && ! TARGET_HITACHI && ! ca->renesas_abi);
8976 return gen_rtx_REG (mode, regno);
8980 if (TARGET_SH5)
8982 if (mode == VOIDmode && TARGET_SHCOMPACT)
8983 return GEN_INT (ca->call_cookie);
8985 /* The following test assumes unnamed arguments are promoted to
8986 DFmode. */
8987 if (mode == SFmode && ca->free_single_fp_reg)
8988 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8990 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8991 && (named || ! ca->prototype_p)
8992 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8994 if (! ca->prototype_p && TARGET_SHMEDIA)
8995 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8997 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8998 FIRST_FP_PARM_REG
8999 + ca->arg_count[(int) SH_ARG_FLOAT]);
9002 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
9003 && (! TARGET_SHCOMPACT
9004 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
9005 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
9006 type, named))))
9008 return gen_rtx_REG (mode, (FIRST_PARM_REG
9009 + ca->arg_count[(int) SH_ARG_INT]));
9012 return NULL_RTX;
9015 return NULL_RTX;
9018 /* Update the data in CUM to advance over an argument
9019 of mode MODE and data type TYPE.
9020 (TYPE is null for libcalls where that information may not be
9021 available.) */
9022 static void
9023 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
9024 const_tree type, bool named)
9026 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9028 if (ca->force_mem)
9029 ca->force_mem = 0;
9030 else if (TARGET_SH5)
9032 const_tree type2 = (ca->byref && type
9033 ? TREE_TYPE (type)
9034 : type);
9035 enum machine_mode mode2 = (ca->byref && type
9036 ? TYPE_MODE (type2)
9037 : mode);
9038 int dwords = ((ca->byref
9039 ? ca->byref
9040 : mode2 == BLKmode
9041 ? int_size_in_bytes (type2)
9042 : GET_MODE_SIZE (mode2)) + 7) / 8;
9043 int numregs = MIN (dwords, NPARM_REGS (SImode)
9044 - ca->arg_count[(int) SH_ARG_INT]);
9046 if (numregs)
9048 ca->arg_count[(int) SH_ARG_INT] += numregs;
9049 if (TARGET_SHCOMPACT
9050 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9052 ca->call_cookie
9053 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9054 - numregs, 1);
9055 /* N.B. We want this also for outgoing. */
9056 ca->stack_regs += numregs;
9058 else if (ca->byref)
9060 if (! ca->outgoing)
9061 ca->stack_regs += numregs;
9062 ca->byref_regs += numregs;
9063 ca->byref = 0;
9065 ca->call_cookie
9066 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9067 - numregs, 2);
9068 while (--numregs);
9069 ca->call_cookie
9070 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9071 - 1, 1);
9073 else if (dwords > numregs)
9075 int pushregs = numregs;
9077 if (TARGET_SHCOMPACT)
9078 ca->stack_regs += numregs;
9079 while (pushregs < NPARM_REGS (SImode) - 1
9080 && (CALL_COOKIE_INT_REG_GET
9081 (ca->call_cookie,
9082 NPARM_REGS (SImode) - pushregs)
9083 == 1))
9085 ca->call_cookie
9086 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9087 - pushregs, 1);
9088 pushregs++;
9090 if (numregs == NPARM_REGS (SImode))
9091 ca->call_cookie
9092 |= CALL_COOKIE_INT_REG (0, 1)
9093 | CALL_COOKIE_STACKSEQ (numregs - 1);
9094 else
9095 ca->call_cookie
9096 |= CALL_COOKIE_STACKSEQ (numregs);
9099 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9100 && (named || ! ca->prototype_p))
9102 if (mode2 == SFmode && ca->free_single_fp_reg)
9103 ca->free_single_fp_reg = 0;
9104 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9105 < NPARM_REGS (SFmode))
9107 int numfpregs
9108 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9109 NPARM_REGS (SFmode)
9110 - ca->arg_count[(int) SH_ARG_FLOAT]);
9112 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9114 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9116 if (ca->outgoing && numregs > 0)
9119 ca->call_cookie
9120 |= (CALL_COOKIE_INT_REG
9121 (ca->arg_count[(int) SH_ARG_INT]
9122 - numregs + ((numfpregs - 2) / 2),
9123 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9124 - numfpregs) / 2));
9126 while (numfpregs -= 2);
9128 else if (mode2 == SFmode && (named)
9129 && (ca->arg_count[(int) SH_ARG_FLOAT]
9130 < NPARM_REGS (SFmode)))
9131 ca->free_single_fp_reg
9132 = FIRST_FP_PARM_REG - numfpregs
9133 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9136 return;
9139 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9141 /* Note that we've used the skipped register. */
9142 if (mode == SFmode && ca->free_single_fp_reg)
9144 ca->free_single_fp_reg = 0;
9145 return;
9147 /* When we have a DF after an SF, there's an SF register that get
9148 skipped in order to align the DF value. We note this skipped
9149 register, because the next SF value will use it, and not the
9150 SF that follows the DF. */
9151 if (mode == DFmode
9152 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
9154 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
9155 + BASE_ARG_REG (mode));
9159 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9160 || sh_pass_in_reg_p (*ca, mode, type))
9161 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9162 = (sh_round_reg (*ca, mode)
9163 + (mode == BLKmode
9164 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9165 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
9168 /* The Renesas calling convention doesn't quite fit into this scheme since
9169 the address is passed like an invisible argument, but one that is always
9170 passed in memory. */
9171 static rtx
9172 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9174 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9175 return NULL_RTX;
9176 return gen_rtx_REG (Pmode, 2);
9179 /* Worker function for TARGET_FUNCTION_VALUE.
9181 For the SH, this is like LIBCALL_VALUE, except that we must change the
9182 mode like PROMOTE_MODE does.
9183 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9184 tested here has to be kept in sync with the one in
9185 explow.c:promote_mode. */
9186 static rtx
9187 sh_function_value (const_tree valtype,
9188 const_tree fn_decl_or_type,
9189 bool outgoing ATTRIBUTE_UNUSED)
9191 if (fn_decl_or_type
9192 && !DECL_P (fn_decl_or_type))
9193 fn_decl_or_type = NULL;
9195 return gen_rtx_REG (
9196 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9197 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9198 && (TREE_CODE (valtype) == INTEGER_TYPE
9199 || TREE_CODE (valtype) == ENUMERAL_TYPE
9200 || TREE_CODE (valtype) == BOOLEAN_TYPE
9201 || TREE_CODE (valtype) == REAL_TYPE
9202 || TREE_CODE (valtype) == OFFSET_TYPE))
9203 && sh_promote_prototypes (fn_decl_or_type)
9204 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9205 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9208 /* Worker function for TARGET_LIBCALL_VALUE. */
9209 static rtx
9210 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9212 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9215 /* Return true if N is a possible register number of function value. */
9216 static bool
9217 sh_function_value_regno_p (const unsigned int regno)
9219 return ((regno) == FIRST_RET_REG
9220 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9221 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9224 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9225 static bool
9226 sh_return_in_memory (const_tree type, const_tree fndecl)
9228 if (TARGET_SH5)
9230 if (TYPE_MODE (type) == BLKmode)
9231 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9232 else
9233 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9235 else
9237 return (TYPE_MODE (type) == BLKmode
9238 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9239 && TREE_CODE (type) == RECORD_TYPE));
9243 /* We actually emit the code in sh_expand_prologue. We used to use
9244 a static variable to flag that we need to emit this code, but that
9245 doesn't when inlining, when functions are deferred and then emitted
9246 later. Fortunately, we already have two flags that are part of struct
9247 function that tell if a function uses varargs or stdarg. */
9248 static void
9249 sh_setup_incoming_varargs (cumulative_args_t ca,
9250 enum machine_mode mode,
9251 tree type,
9252 int *pretend_arg_size,
9253 int second_time ATTRIBUTE_UNUSED)
9255 gcc_assert (cfun->stdarg);
9256 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9258 int named_parm_regs, anon_parm_regs;
9260 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
9261 + (mode == BLKmode
9262 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9263 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
9264 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9265 if (anon_parm_regs > 0)
9266 *pretend_arg_size = anon_parm_regs * 4;
9270 static bool
9271 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9273 return TARGET_SH5;
9276 static bool
9277 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9279 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9281 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9285 /* Define the offset between two registers, one to be eliminated, and
9286 the other its replacement, at the start of a routine. */
9288 initial_elimination_offset (int from, int to)
9290 int regs_saved;
9291 int regs_saved_rounding = 0;
9292 int total_saved_regs_space;
9293 int total_auto_space;
9294 int save_flags = target_flags;
9295 int copy_flags;
9296 HARD_REG_SET live_regs_mask;
9298 shmedia_space_reserved_for_target_registers = false;
9299 regs_saved = calc_live_regs (&live_regs_mask);
9300 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9302 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9304 shmedia_space_reserved_for_target_registers = true;
9305 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9308 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9309 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9310 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9312 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9313 copy_flags = target_flags;
9314 target_flags = save_flags;
9316 total_saved_regs_space = regs_saved + regs_saved_rounding;
9318 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9319 return total_saved_regs_space + total_auto_space
9320 + crtl->args.info.byref_regs * 8;
9322 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9323 return total_saved_regs_space + total_auto_space
9324 + crtl->args.info.byref_regs * 8;
9326 /* Initial gap between fp and sp is 0. */
9327 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9328 return 0;
9330 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9331 return rounded_frame_size (0);
9333 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9334 return rounded_frame_size (0);
9336 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9337 && (to == HARD_FRAME_POINTER_REGNUM
9338 || to == STACK_POINTER_REGNUM));
9339 if (TARGET_SH5)
9341 int n = total_saved_regs_space;
9342 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9343 save_schedule schedule;
9344 save_entry *entry;
9346 n += total_auto_space;
9348 /* If it wasn't saved, there's not much we can do. */
9349 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9350 return n;
9352 target_flags = copy_flags;
9354 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9355 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9356 if (entry->reg == pr_reg)
9358 target_flags = save_flags;
9359 return entry->offset;
9361 gcc_unreachable ();
9363 else
9364 return total_auto_space;
9367 /* Parse the -mfixed-range= option string. */
9368 void
9369 sh_fix_range (const char *const_str)
9371 int i, first, last;
9372 char *str, *dash, *comma;
9374 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9375 REG2 are either register names or register numbers. The effect
9376 of this option is to mark the registers in the range from REG1 to
9377 REG2 as ``fixed'' so they won't be used by the compiler. */
9379 i = strlen (const_str);
9380 str = (char *) alloca (i + 1);
9381 memcpy (str, const_str, i + 1);
9383 while (1)
9385 dash = strchr (str, '-');
9386 if (!dash)
9388 warning (0, "value of -mfixed-range must have form REG1-REG2");
9389 return;
9391 *dash = '\0';
9392 comma = strchr (dash + 1, ',');
9393 if (comma)
9394 *comma = '\0';
9396 first = decode_reg_name (str);
9397 if (first < 0)
9399 warning (0, "unknown register name: %s", str);
9400 return;
9403 last = decode_reg_name (dash + 1);
9404 if (last < 0)
9406 warning (0, "unknown register name: %s", dash + 1);
9407 return;
9410 *dash = '-';
9412 if (first > last)
9414 warning (0, "%s-%s is an empty range", str, dash + 1);
9415 return;
9418 for (i = first; i <= last; ++i)
9419 fixed_regs[i] = call_used_regs[i] = 1;
9421 if (!comma)
9422 break;
9424 *comma = ',';
9425 str = comma + 1;
9429 /* Insert any deferred function attributes from earlier pragmas. */
9430 static void
9431 sh_insert_attributes (tree node, tree *attributes)
9433 tree attrs;
9435 if (TREE_CODE (node) != FUNCTION_DECL)
9436 return;
9438 /* We are only interested in fields. */
9439 if (!DECL_P (node))
9440 return;
9442 /* Append the attributes to the deferred attributes. */
9443 *sh_deferred_function_attributes_tail = *attributes;
9444 attrs = sh_deferred_function_attributes;
9445 if (!attrs)
9446 return;
9448 /* Some attributes imply or require the interrupt attribute. */
9449 if (!lookup_attribute ("interrupt_handler", attrs)
9450 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9452 /* If we have a trapa_handler, but no interrupt_handler attribute,
9453 insert an interrupt_handler attribute. */
9454 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9455 /* We can't use sh_pr_interrupt here because that's not in the
9456 java frontend. */
9457 attrs
9458 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9459 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9460 if the interrupt attribute is missing, we ignore the attribute
9461 and warn. */
9462 else if (lookup_attribute ("sp_switch", attrs)
9463 || lookup_attribute ("trap_exit", attrs)
9464 || lookup_attribute ("nosave_low_regs", attrs)
9465 || lookup_attribute ("resbank", attrs))
9467 tree *tail;
9469 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9471 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9472 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9473 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9474 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9475 warning (OPT_Wattributes,
9476 "%qE attribute only applies to interrupt functions",
9477 TREE_PURPOSE (attrs));
9478 else
9480 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9481 NULL_TREE);
9482 tail = &TREE_CHAIN (*tail);
9485 attrs = *attributes;
9489 /* Install the processed list. */
9490 *attributes = attrs;
9492 /* Clear deferred attributes. */
9493 sh_deferred_function_attributes = NULL_TREE;
9494 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9496 return;
9499 /*------------------------------------------------------------------------------
9500 Target specific attributes
9501 Supported attributes are:
9503 * interrupt_handler
9504 Specifies this function is an interrupt handler.
9506 * trapa_handler
9507 Like interrupt_handler, but don't save all registers.
9509 * sp_switch
9510 Specifies an alternate stack for an interrupt handler to run on.
9512 * trap_exit
9513 Use a trapa to exit an interrupt function instead of rte.
9515 * nosave_low_regs
9516 Don't save r0..r7 in an interrupt handler function.
9517 This is useful on SH3* and SH4*, which have a separate set of low
9518 regs for user and privileged modes.
9519 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9520 those that run with interrupts disabled and thus can't be
9521 interrupted thenselves).
9523 * renesas
9524 Use Renesas calling/layout conventions (functions and structures).
9526 * resbank
9527 In case of an interrupt handler function, use a register bank to
9528 save registers R0-R14, MACH, MACL, GBR and PR.
9529 This is available only on SH2A targets.
9531 * function_vector
9532 Declares a function to be called using the TBR relative addressing
9533 mode. Takes an argument that specifies the slot number in the table
9534 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9537 /* Handle a 'resbank' attribute. */
9538 static tree
9539 sh_handle_resbank_handler_attribute (tree * node, tree name,
9540 tree args ATTRIBUTE_UNUSED,
9541 int flags ATTRIBUTE_UNUSED,
9542 bool * no_add_attrs)
9544 if (!TARGET_SH2A)
9546 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9547 name);
9548 *no_add_attrs = true;
9550 if (TREE_CODE (*node) != FUNCTION_DECL)
9552 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9553 name);
9554 *no_add_attrs = true;
9557 return NULL_TREE;
9560 /* Handle an "interrupt_handler" attribute; arguments as in
9561 struct attribute_spec.handler. */
9562 static tree
9563 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9564 tree args ATTRIBUTE_UNUSED,
9565 int flags ATTRIBUTE_UNUSED,
9566 bool *no_add_attrs)
9568 if (TREE_CODE (*node) != FUNCTION_DECL)
9570 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9571 name);
9572 *no_add_attrs = true;
9574 else if (TARGET_SHCOMPACT)
9576 error ("attribute interrupt_handler is not compatible with -m5-compact");
9577 *no_add_attrs = true;
9580 return NULL_TREE;
9583 /* Handle an 'function_vector' attribute; arguments as in
9584 struct attribute_spec.handler. */
9585 static tree
9586 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9587 tree args ATTRIBUTE_UNUSED,
9588 int flags ATTRIBUTE_UNUSED,
9589 bool * no_add_attrs)
9591 if (!TARGET_SH2A)
9593 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9594 name);
9595 *no_add_attrs = true;
9597 else if (TREE_CODE (*node) != FUNCTION_DECL)
9599 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9600 name);
9601 *no_add_attrs = true;
9603 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9605 /* The argument must be a constant integer. */
9606 warning (OPT_Wattributes,
9607 "%qE attribute argument not an integer constant",
9608 name);
9609 *no_add_attrs = true;
9611 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9613 /* The argument value must be between 0 to 255. */
9614 warning (OPT_Wattributes,
9615 "%qE attribute argument should be between 0 to 255",
9616 name);
9617 *no_add_attrs = true;
9619 return NULL_TREE;
9622 /* Returns true if current function has been assigned the attribute
9623 'function_vector'. */
9624 bool
9625 sh2a_is_function_vector_call (rtx x)
9627 if (GET_CODE (x) == SYMBOL_REF
9628 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9630 tree tr = SYMBOL_REF_DECL (x);
9632 if (sh2a_function_vector_p (tr))
9633 return true;
9636 return false;
9639 /* Returns the function vector number, if the attribute
9640 'function_vector' is assigned, otherwise returns zero. */
9642 sh2a_get_function_vector_number (rtx x)
9644 int num;
9645 tree list, t;
9647 if ((GET_CODE (x) == SYMBOL_REF)
9648 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9650 t = SYMBOL_REF_DECL (x);
9652 if (TREE_CODE (t) != FUNCTION_DECL)
9653 return 0;
9655 list = SH_ATTRIBUTES (t);
9656 while (list)
9658 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9660 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9661 return num;
9664 list = TREE_CHAIN (list);
9667 return 0;
9669 else
9670 return 0;
9673 /* Handle an "sp_switch" attribute; arguments as in
9674 struct attribute_spec.handler. */
9675 static tree
9676 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9677 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9679 if (TREE_CODE (*node) != FUNCTION_DECL)
9681 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9682 name);
9683 *no_add_attrs = true;
9685 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9687 /* The argument must be a constant string. */
9688 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9689 name);
9690 *no_add_attrs = true;
9693 return NULL_TREE;
9696 /* Handle an "trap_exit" attribute; arguments as in
9697 struct attribute_spec.handler. */
9698 static tree
9699 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9700 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9702 if (TREE_CODE (*node) != FUNCTION_DECL)
9704 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9705 name);
9706 *no_add_attrs = true;
9708 /* The argument specifies a trap number to be used in a trapa instruction
9709 at function exit (instead of an rte instruction). */
9710 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9712 /* The argument must be a constant integer. */
9713 warning (OPT_Wattributes, "%qE attribute argument not an "
9714 "integer constant", name);
9715 *no_add_attrs = true;
9718 return NULL_TREE;
9721 static tree
9722 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9723 tree name ATTRIBUTE_UNUSED,
9724 tree args ATTRIBUTE_UNUSED,
9725 int flags ATTRIBUTE_UNUSED,
9726 bool *no_add_attrs ATTRIBUTE_UNUSED)
9728 return NULL_TREE;
9731 /* True if __attribute__((renesas)) or -mrenesas. */
9732 bool
9733 sh_attr_renesas_p (const_tree td)
9735 if (TARGET_HITACHI)
9736 return true;
9737 if (td == NULL_TREE)
9738 return false;
9739 if (DECL_P (td))
9740 td = TREE_TYPE (td);
9741 if (td == error_mark_node)
9742 return false;
9743 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9744 != NULL_TREE);
9747 /* True if __attribute__((renesas)) or -mrenesas, for the current
9748 function. */
9749 bool
9750 sh_cfun_attr_renesas_p (void)
9752 return sh_attr_renesas_p (current_function_decl);
9755 /* Returns true if the current function has the "interrupt_handler"
9756 attribute set. */
9757 bool
9758 sh_cfun_interrupt_handler_p (void)
9760 return (lookup_attribute ("interrupt_handler",
9761 DECL_ATTRIBUTES (current_function_decl))
9762 != NULL_TREE);
9765 /* Returns true if FUNC has been assigned the attribute
9766 "function_vector". */
9767 bool
9768 sh2a_function_vector_p (tree func)
9770 tree list;
9771 if (TREE_CODE (func) != FUNCTION_DECL)
9772 return false;
9774 list = SH_ATTRIBUTES (func);
9775 while (list)
9777 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9778 return true;
9780 list = TREE_CHAIN (list);
9782 return false;
9785 /* Returns true if given tree has the "resbank" attribute set. */
9786 bool
9787 sh_cfun_resbank_handler_p (void)
9789 return ((lookup_attribute ("resbank",
9790 DECL_ATTRIBUTES (current_function_decl))
9791 != NULL_TREE)
9792 && (lookup_attribute ("interrupt_handler",
9793 DECL_ATTRIBUTES (current_function_decl))
9794 != NULL_TREE) && TARGET_SH2A);
9797 /* Returns true if the current function has a "trap_exit" attribute set. */
9798 bool
9799 sh_cfun_trap_exit_p (void)
9801 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9802 != NULL_TREE;
9805 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9806 static const char *
9807 sh_check_pch_target_flags (int old_flags)
9809 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9810 | MASK_SH_E | MASK_HARD_SH4
9811 | MASK_FPU_SINGLE | MASK_SH4))
9812 return _("created and used with different architectures / ABIs");
9813 if ((old_flags ^ target_flags) & MASK_HITACHI)
9814 return _("created and used with different ABIs");
9815 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9816 return _("created and used with different endianness");
9817 return NULL;
9820 /* Predicates used by the templates. */
9822 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9823 Used only in general_movsrc_operand. */
9824 bool
9825 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9827 switch (REGNO (op))
9829 case PR_REG:
9830 case MACL_REG:
9831 case MACH_REG:
9832 return true;
9834 return false;
9837 /* Returns true if OP is a floating point value with value 0.0. */
9838 bool
9839 fp_zero_operand (rtx op)
9841 REAL_VALUE_TYPE r;
9843 if (GET_MODE (op) != SFmode)
9844 return false;
9846 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9847 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9850 /* Returns true if OP is a floating point value with value 1.0. */
9851 bool
9852 fp_one_operand (rtx op)
9854 REAL_VALUE_TYPE r;
9856 if (GET_MODE (op) != SFmode)
9857 return false;
9859 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9860 return REAL_VALUES_EQUAL (r, dconst1);
9863 /* In general mode switching is used. If we are
9864 compiling without -mfmovd, movsf_ie isn't taken into account for
9865 mode switching. We could check in machine_dependent_reorg for
9866 cases where we know we are in single precision mode, but there is
9867 interface to find that out during reload, so we must avoid
9868 choosing an fldi alternative during reload and thus failing to
9869 allocate a scratch register for the constant loading. */
9870 bool
9871 fldi_ok (void)
9873 return true;
9876 /* Return the TLS type for TLS symbols. */
9877 enum tls_model
9878 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9880 if (GET_CODE (op) != SYMBOL_REF)
9881 return TLS_MODEL_NONE;
9882 return SYMBOL_REF_TLS_MODEL (op);
9885 /* Return the destination address of a branch. */
9886 static int
9887 branch_dest (rtx branch)
9889 rtx dest = SET_SRC (PATTERN (branch));
9890 int dest_uid;
9892 if (GET_CODE (dest) == IF_THEN_ELSE)
9893 dest = XEXP (dest, 1);
9894 dest = XEXP (dest, 0);
9895 dest_uid = INSN_UID (dest);
9896 return INSN_ADDRESSES (dest_uid);
9899 /* Return nonzero if REG is not used after INSN.
9900 We assume REG is a reload reg, and therefore does
9901 not live past labels. It may live past calls or jumps though. */
9902 bool
9903 reg_unused_after (rtx reg, rtx insn)
9905 enum rtx_code code;
9906 rtx set;
9908 /* If the reg is set by this instruction, then it is safe for our
9909 case. Disregard the case where this is a store to memory, since
9910 we are checking a register used in the store address. */
9911 set = single_set (insn);
9912 if (set && !MEM_P (SET_DEST (set))
9913 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9914 return true;
9916 while ((insn = NEXT_INSN (insn)))
9918 rtx set;
9919 if (!INSN_P (insn))
9920 continue;
9922 code = GET_CODE (insn);
9924 #if 0
9925 /* If this is a label that existed before reload, then the register
9926 is dead here. However, if this is a label added by reorg, then
9927 the register may still be live here. We can't tell the difference,
9928 so we just ignore labels completely. */
9929 if (code == CODE_LABEL)
9930 return 1;
9931 /* else */
9932 #endif
9934 if (code == JUMP_INSN)
9935 return false;
9937 /* If this is a sequence, we must handle them all at once.
9938 We could have for instance a call that sets the target register,
9939 and an insn in a delay slot that uses the register. In this case,
9940 we must return 0. */
9941 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9943 int i;
9944 int retval = 0;
9946 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9948 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9949 rtx set = single_set (this_insn);
9951 if (CALL_P (this_insn))
9952 code = CALL_INSN;
9953 else if (JUMP_P (this_insn))
9955 if (INSN_ANNULLED_BRANCH_P (this_insn))
9956 return false;
9957 code = JUMP_INSN;
9960 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9961 return false;
9962 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9964 if (!MEM_P (SET_DEST (set)))
9965 retval = true;
9966 else
9967 return false;
9969 if (set == NULL_RTX
9970 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9971 return false;
9973 if (retval == 1)
9974 return true;
9975 else if (code == JUMP_INSN)
9976 return false;
9979 set = single_set (insn);
9980 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9981 return false;
9982 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9983 return !MEM_P (SET_DEST (set));
9984 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9985 return false;
9987 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9988 return true;
9990 return true;
9993 #include "ggc.h"
9995 static GTY(()) rtx t_reg_rtx;
9997 get_t_reg_rtx (void)
9999 if (! t_reg_rtx)
10000 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
10001 return t_reg_rtx;
10004 static GTY(()) rtx fpscr_rtx;
10006 get_fpscr_rtx (void)
10008 if (! fpscr_rtx)
10010 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
10011 REG_USERVAR_P (fpscr_rtx) = 1;
10012 mark_user_reg (fpscr_rtx);
10014 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
10015 mark_user_reg (fpscr_rtx);
10016 return fpscr_rtx;
10019 static GTY(()) tree fpscr_values;
10021 static void
10022 emit_fpu_switch (rtx scratch, int index)
10024 rtx dst, src;
10026 if (fpscr_values == NULL)
10028 tree t;
10030 t = build_index_type (integer_one_node);
10031 t = build_array_type (integer_type_node, t);
10032 t = build_decl (BUILTINS_LOCATION,
10033 VAR_DECL, get_identifier ("__fpscr_values"), t);
10034 DECL_ARTIFICIAL (t) = 1;
10035 DECL_IGNORED_P (t) = 1;
10036 DECL_EXTERNAL (t) = 1;
10037 TREE_STATIC (t) = 1;
10038 TREE_PUBLIC (t) = 1;
10039 TREE_USED (t) = 1;
10041 fpscr_values = t;
10044 src = DECL_RTL (fpscr_values);
10045 if (!can_create_pseudo_p ())
10047 emit_move_insn (scratch, XEXP (src, 0));
10048 if (index != 0)
10049 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10050 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
10052 else
10053 src = adjust_address (src, PSImode, index * 4);
10055 dst = get_fpscr_rtx ();
10056 emit_move_insn (dst, src);
10059 void
10060 emit_sf_insn (rtx pat)
10062 emit_insn (pat);
10065 void
10066 emit_df_insn (rtx pat)
10068 emit_insn (pat);
10071 void
10072 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10074 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10077 void
10078 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10080 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
10081 get_fpscr_rtx ()));
10084 void
10085 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10087 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10090 void
10091 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10093 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
10094 get_fpscr_rtx ()));
10097 static rtx get_free_reg (HARD_REG_SET);
10099 /* This function returns a register to use to load the address to load
10100 the fpscr from. Currently it always returns r1 or r7, but when we are
10101 able to use pseudo registers after combine, or have a better mechanism
10102 for choosing a register, it should be done here. */
10103 /* REGS_LIVE is the liveness information for the point for which we
10104 need this allocation. In some bare-bones exit blocks, r1 is live at the
10105 start. We can even have all of r0..r3 being live:
10106 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10107 INSN before which new insns are placed with will clobber the register
10108 we return. If a basic block consists only of setting the return value
10109 register to a pseudo and using that register, the return value is not
10110 live before or after this block, yet we we'll insert our insns right in
10111 the middle. */
10112 static rtx
10113 get_free_reg (HARD_REG_SET regs_live)
10115 if (! TEST_HARD_REG_BIT (regs_live, 1))
10116 return gen_rtx_REG (Pmode, 1);
10118 /* Hard reg 1 is live; since this is a small register classes target,
10119 there shouldn't be anything but a jump before the function end. */
10120 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10121 return gen_rtx_REG (Pmode, 7);
10124 /* This function will set the fpscr from memory.
10125 MODE is the mode we are setting it to. */
10126 void
10127 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10129 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10130 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10131 rtx addr_reg;
10133 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10134 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10137 /* Is the given character a logical line separator for the assembler? */
10138 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10139 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10140 #endif
10142 static bool
10143 sequence_insn_p (rtx insn)
10145 rtx prev, next;
10147 prev = PREV_INSN (insn);
10148 if (prev == NULL)
10149 return false;
10151 next = NEXT_INSN (prev);
10152 if (next == NULL)
10153 return false;
10155 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10159 sh_insn_length_adjustment (rtx insn)
10161 /* Instructions with unfilled delay slots take up an extra two bytes for
10162 the nop in the delay slot. */
10163 if (((NONJUMP_INSN_P (insn)
10164 && GET_CODE (PATTERN (insn)) != USE
10165 && GET_CODE (PATTERN (insn)) != CLOBBER)
10166 || CALL_P (insn) || JUMP_P (insn))
10167 && ! sequence_insn_p (insn)
10168 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10169 return 2;
10171 /* SH2e has a bug that prevents the use of annulled branches, so if
10172 the delay slot is not filled, we'll have to put a NOP in it. */
10173 if (sh_cpu_attr == CPU_SH2E
10174 && JUMP_P (insn)
10175 && get_attr_type (insn) == TYPE_CBRANCH
10176 && ! sequence_insn_p (insn))
10177 return 2;
10179 /* sh-dsp parallel processing insn take four bytes instead of two. */
10181 if (NONJUMP_INSN_P (insn))
10183 int sum = 0;
10184 rtx body = PATTERN (insn);
10185 const char *templ;
10186 char c;
10187 bool maybe_label = true;
10189 if (GET_CODE (body) == ASM_INPUT)
10190 templ = XSTR (body, 0);
10191 else if (asm_noperands (body) >= 0)
10192 templ
10193 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10194 else
10195 return 0;
10198 int ppi_adjust = 0;
10201 c = *templ++;
10202 while (c == ' ' || c == '\t');
10203 /* all sh-dsp parallel-processing insns start with p.
10204 The only non-ppi sh insn starting with p is pref.
10205 The only ppi starting with pr is prnd. */
10206 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10207 ppi_adjust = 2;
10208 /* The repeat pseudo-insn expands two three insns, a total of
10209 six bytes in size. */
10210 else if ((c == 'r' || c == 'R')
10211 && ! strncasecmp ("epeat", templ, 5))
10212 ppi_adjust = 4;
10213 while (c && c != '\n'
10214 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10216 /* If this is a label, it is obviously not a ppi insn. */
10217 if (c == ':' && maybe_label)
10219 ppi_adjust = 0;
10220 break;
10222 else if (c == '\'' || c == '"')
10223 maybe_label = false;
10224 c = *templ++;
10226 sum += ppi_adjust;
10227 maybe_label = c != ':';
10229 while (c);
10230 return sum;
10232 return 0;
10235 /* Return TRUE for a valid displacement for the REG+disp addressing
10236 with MODE. */
10237 bool
10238 sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
10239 bool allow_zero)
10241 if (! CONST_INT_P (op))
10242 return false;
10244 if (TARGET_SHMEDIA)
10246 int size;
10248 /* Check if this is the address of an unaligned load / store. */
10249 if (mode == VOIDmode)
10250 return satisfies_constraint_I06 (op);
10252 size = GET_MODE_SIZE (mode);
10253 return (!(INTVAL (op) & (size - 1))
10254 && INTVAL (op) >= -512 * size
10255 && INTVAL (op) < 512 * size);
10257 else
10259 const HOST_WIDE_INT offset = INTVAL (op);
10260 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10261 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10263 /* If the mode does not support any displacement always return false.
10264 Even though an index of '0' is actually always valid, it will cause
10265 troubles when e.g. a DFmode move is split into two SFmode moves,
10266 where one SFmode move will have index '0' and the other move will
10267 have index '4'. */
10268 if (!allow_zero && max_disp < 1)
10269 return false;
10271 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10275 /* Recognize an RTL expression that is a valid memory address for
10276 an instruction.
10277 The MODE argument is the machine mode for the MEM expression
10278 that wants to use this address.
10279 Allow REG
10280 REG+disp
10281 REG+r0
10282 REG++
10283 --REG
10285 GBR+disp */
10286 static bool
10287 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
10289 if (! ALLOW_INDEXED_ADDRESS
10290 && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
10291 return false;
10293 if (REG_P (x) && REGNO (x) == GBR_REG)
10294 return true;
10296 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10297 return true;
10298 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10299 && ! TARGET_SHMEDIA
10300 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10301 return true;
10302 else if (GET_CODE (x) == PLUS
10303 && (mode != PSImode || reload_completed))
10305 rtx xop0 = XEXP (x, 0);
10306 rtx xop1 = XEXP (x, 1);
10308 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10309 return gbr_displacement (xop1, mode);
10311 if (GET_MODE_SIZE (mode) <= 8
10312 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10313 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10314 return true;
10316 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10317 || ((xop0 == stack_pointer_rtx
10318 || xop0 == hard_frame_pointer_rtx)
10319 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10320 || ((xop1 == stack_pointer_rtx
10321 || xop1 == hard_frame_pointer_rtx)
10322 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10323 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10324 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10325 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10326 && TARGET_FMOVD && mode == DFmode)))
10328 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10329 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10330 return true;
10331 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10332 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10333 return true;
10337 return false;
10340 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10341 isn't protected by a PIC unspec. */
10342 bool
10343 nonpic_symbol_mentioned_p (rtx x)
10345 const char *fmt;
10346 int i;
10348 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10349 || GET_CODE (x) == PC)
10350 return true;
10352 /* We don't want to look into the possible MEM location of a
10353 CONST_DOUBLE, since we're not going to use it, in general. */
10354 if (GET_CODE (x) == CONST_DOUBLE)
10355 return false;
10357 if (GET_CODE (x) == UNSPEC
10358 && (XINT (x, 1) == UNSPEC_PIC
10359 || XINT (x, 1) == UNSPEC_GOT
10360 || XINT (x, 1) == UNSPEC_GOTOFF
10361 || XINT (x, 1) == UNSPEC_GOTPLT
10362 || XINT (x, 1) == UNSPEC_GOTTPOFF
10363 || XINT (x, 1) == UNSPEC_DTPOFF
10364 || XINT (x, 1) == UNSPEC_TPOFF
10365 || XINT (x, 1) == UNSPEC_PLT
10366 || XINT (x, 1) == UNSPEC_SYMOFF
10367 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10368 return false;
10370 fmt = GET_RTX_FORMAT (GET_CODE (x));
10371 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10373 if (fmt[i] == 'E')
10375 int j;
10376 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10377 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10378 return true;
10380 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10381 return true;
10384 return false;
10387 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10388 @GOTOFF in `reg'. */
10390 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
10391 rtx reg)
10393 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10394 return orig;
10396 if (GET_CODE (orig) == LABEL_REF
10397 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10399 if (reg == NULL_RTX)
10400 reg = gen_reg_rtx (Pmode);
10402 emit_insn (gen_symGOTOFF2reg (reg, orig));
10403 return reg;
10405 else if (GET_CODE (orig) == SYMBOL_REF)
10407 if (reg == NULL_RTX)
10408 reg = gen_reg_rtx (Pmode);
10410 emit_insn (gen_symGOT2reg (reg, orig));
10411 return reg;
10413 return orig;
10416 /* Given a (logical) mode size and an offset in bytes, try to find a the
10417 appropriate displacement value for a mov insn. On SH the displacements
10418 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10419 15 bytes in QImode. To compensate this we create a new base address by
10420 adding an adjustment value to it.
10422 If the originally requested offset is greater than 127 we prefer using
10423 values 124..127 over 128..131 to increase opportunities to use the
10424 add #imm, Rn insn.
10426 In some cases it is possible that a requested offset might seem unaligned
10427 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10428 This is compensated by adjusting the base address so that the effective
10429 address of the displacement move insn will be aligned.
10431 This is not the best possible way of rebasing the base address, as it
10432 does not look at other present displacement addressings around it.
10433 In some cases this can create more base address adjustments than would
10434 actually be necessary. */
10435 struct disp_adjust
10437 rtx offset_adjust;
10438 rtx mov_disp;
10441 static struct disp_adjust
10442 sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
10444 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10446 /* Do not try to use SH2A's large displacements here, because this would
10447 effectively disable the small displacement insns. */
10448 const int mode_sz = GET_MODE_SIZE (mode);
10449 const int mov_insn_sz = mov_insn_size (mode, false);
10450 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10451 const int max_disp_next = max_disp + mov_insn_sz;
10452 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10453 HOST_WIDE_INT offset_adjust;
10455 /* In some cases this actually does happen and we must check for it. */
10456 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10457 return res;
10459 /* Keeps the previous behavior for QImode displacement addressing.
10460 This just decides how the offset is re-based. Removing this special
10461 case will result in slightly bigger code on average, but it's not that
10462 bad actually. */
10463 if (mov_insn_sz == 1)
10464 align_modifier = 0;
10466 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10468 if (mode_sz + offset - offset_adjust <= max_disp_next)
10470 res.offset_adjust = GEN_INT (offset_adjust);
10471 res.mov_disp = GEN_INT (offset - offset_adjust);
10474 return res;
10477 /* Try to modify an illegitimate address and make it legitimate.
10478 If we find one, return the new, valid address.
10479 Otherwise, return the original address. */
10480 static rtx
10481 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
10483 if (flag_pic)
10484 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10486 if (TARGET_SHMEDIA)
10487 return x;
10489 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10490 || (TARGET_SH2E && mode == SFmode))
10491 return x;
10493 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10494 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10496 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10497 INTVAL (XEXP (x, 1)));
10499 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10501 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10502 adj.offset_adjust, NULL_RTX, 0,
10503 OPTAB_LIB_WIDEN);
10504 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10508 return x;
10511 /* Attempt to replace *p, which is an address that needs reloading, with
10512 a valid memory address for an operand of mode MODE.
10513 Like for sh_legitimize_address, for the SH we try to get a normal form
10514 of the address. That will allow inheritance of the address reloads. */
10515 bool
10516 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
10517 int itype)
10519 enum reload_type type = (enum reload_type) itype;
10520 const int mode_sz = GET_MODE_SIZE (mode);
10522 if (! ALLOW_INDEXED_ADDRESS
10523 && GET_CODE (*p) == PLUS
10524 && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1)))
10526 *p = copy_rtx (*p);
10527 push_reload (*p, NULL_RTX, p, NULL,
10528 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10529 return true;
10532 if (! ALLOW_INDEXED_ADDRESS
10533 && GET_CODE (*p) == PLUS
10534 && GET_CODE (XEXP (*p, 0)) == PLUS)
10536 rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0),
10537 XEXP (XEXP (*p, 0), 1));
10538 *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1));
10539 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10540 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10541 return true;
10544 if (TARGET_SHMEDIA)
10545 return false;
10547 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10548 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10549 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
10550 && (ALLOW_INDEXED_ADDRESS
10551 || XEXP (*p, 0) == stack_pointer_rtx
10552 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10554 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10555 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10557 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10559 push_reload (*p, NULL_RTX, p, NULL,
10560 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10561 return true;
10564 if (TARGET_SH2E && mode == SFmode)
10566 *p = copy_rtx (*p);
10567 push_reload (*p, NULL_RTX, p, NULL,
10568 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10569 return true;
10572 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10573 moves because then reload has a problem figuring the constraint
10574 that the move insn target/source reg must be R0.
10575 Or maybe some handling is wrong in sh_secondary_reload for this
10576 to work properly? */
10577 if ((mode_sz == 4 || mode_sz == 8)
10578 && ! (TARGET_SH4 && mode == DFmode)
10579 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10581 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10582 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10583 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10584 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10585 return true;
10589 /* We must re-recognize what we created before. */
10590 if (GET_CODE (*p) == PLUS
10591 && (mode_sz == 4 || mode_sz == 8)
10592 && GET_CODE (XEXP (*p, 0)) == PLUS
10593 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10594 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10595 && CONST_INT_P (XEXP (*p, 1))
10596 && ! (TARGET_SH2E && mode == SFmode))
10598 /* Because this address is so complex, we know it must have
10599 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10600 it is already unshared, and needs no further unsharing. */
10601 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10602 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10603 return true;
10606 return false;
10609 /* In the name of slightly smaller debug output, and to cater to
10610 general assembler lossage, recognize various UNSPEC sequences
10611 and turn them back into a direct symbol reference. */
10612 static rtx
10613 sh_delegitimize_address (rtx orig_x)
10615 rtx x, y;
10617 orig_x = delegitimize_mem_from_attrs (orig_x);
10619 x = orig_x;
10620 if (MEM_P (x))
10621 x = XEXP (x, 0);
10622 if (GET_CODE (x) == CONST)
10624 y = XEXP (x, 0);
10625 if (GET_CODE (y) == UNSPEC)
10627 if (XINT (y, 1) == UNSPEC_GOT
10628 || XINT (y, 1) == UNSPEC_GOTOFF
10629 || XINT (y, 1) == UNSPEC_SYMOFF)
10630 return XVECEXP (y, 0, 0);
10631 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10633 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10635 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10637 if (GET_CODE (symplt) == UNSPEC
10638 && XINT (symplt, 1) == UNSPEC_PLT)
10639 return XVECEXP (symplt, 0, 0);
10642 else if (TARGET_SHMEDIA
10643 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10644 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10646 rtx offset = XVECEXP (y, 0, 1);
10648 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10649 if (MEM_P (orig_x))
10650 x = replace_equiv_address_nv (orig_x, x);
10651 return x;
10656 return orig_x;
10659 /* Mark the use of a constant in the literal table. If the constant
10660 has multiple labels, make it unique. */
10661 static rtx
10662 mark_constant_pool_use (rtx x)
10664 rtx insn, lab, pattern;
10666 if (x == NULL_RTX)
10667 return x;
10669 switch (GET_CODE (x))
10671 case LABEL_REF:
10672 x = XEXP (x, 0);
10673 case CODE_LABEL:
10674 break;
10675 default:
10676 return x;
10679 /* Get the first label in the list of labels for the same constant
10680 and delete another labels in the list. */
10681 lab = x;
10682 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
10684 if (!LABEL_P (insn)
10685 || LABEL_REFS (insn) != NEXT_INSN (insn))
10686 break;
10687 lab = insn;
10690 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10691 INSN_DELETED_P (insn) = 1;
10693 /* Mark constants in a window. */
10694 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
10696 if (!NONJUMP_INSN_P (insn))
10697 continue;
10699 pattern = PATTERN (insn);
10700 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10701 continue;
10703 switch (XINT (pattern, 1))
10705 case UNSPECV_CONST2:
10706 case UNSPECV_CONST4:
10707 case UNSPECV_CONST8:
10708 XVECEXP (pattern, 0, 1) = const1_rtx;
10709 break;
10710 case UNSPECV_WINDOW_END:
10711 if (XVECEXP (pattern, 0, 0) == x)
10712 return lab;
10713 break;
10714 case UNSPECV_CONST_END:
10715 return lab;
10716 default:
10717 break;
10721 return lab;
10724 /* Return true if it's possible to redirect BRANCH1 to the destination
10725 of an unconditional jump BRANCH2. We only want to do this if the
10726 resulting branch will have a short displacement. */
10727 bool
10728 sh_can_redirect_branch (rtx branch1, rtx branch2)
10730 if (flag_expensive_optimizations && simplejump_p (branch2))
10732 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10733 rtx insn;
10734 int distance;
10736 for (distance = 0, insn = NEXT_INSN (branch1);
10737 insn && distance < 256;
10738 insn = PREV_INSN (insn))
10740 if (insn == dest)
10741 return true;
10742 else
10743 distance += get_attr_length (insn);
10745 for (distance = 0, insn = NEXT_INSN (branch1);
10746 insn && distance < 256;
10747 insn = NEXT_INSN (insn))
10749 if (insn == dest)
10750 return true;
10751 else
10752 distance += get_attr_length (insn);
10755 return false;
10758 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10759 bool
10760 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10761 unsigned int new_reg)
10763 /* Interrupt functions can only use registers that have already been
10764 saved by the prologue, even if they would normally be
10765 call-clobbered. */
10766 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10767 return false;
10769 return true;
10772 /* Function to update the integer COST
10773 based on the relationship between INSN that is dependent on
10774 DEP_INSN through the dependence LINK. The default is to make no
10775 adjustment to COST. This can be used for example to specify to
10776 the scheduler that an output- or anti-dependence does not incur
10777 the same cost as a data-dependence. The return value should be
10778 the new value for COST. */
10779 static int
10780 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10782 rtx reg, use_pat;
10784 if (TARGET_SHMEDIA)
10786 /* On SHmedia, if the dependence is an anti-dependence or
10787 output-dependence, there is no cost. */
10788 if (REG_NOTE_KIND (link) != 0)
10790 /* However, dependencies between target register loads and
10791 uses of the register in a subsequent block that are separated
10792 by a conditional branch are not modelled - we have to do with
10793 the anti-dependency between the target register load and the
10794 conditional branch that ends the current block. */
10795 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10796 && GET_CODE (PATTERN (dep_insn)) == SET
10797 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10798 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10799 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10801 int orig_cost = cost;
10802 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10803 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10804 ? insn : JUMP_LABEL (insn));
10805 /* On the likely path, the branch costs 1, on the unlikely path,
10806 it costs 3. */
10807 cost--;
10809 target = next_active_insn (target);
10810 while (target && ! flow_dependent_p (target, dep_insn)
10811 && --cost > 0);
10812 /* If two branches are executed in immediate succession, with the
10813 first branch properly predicted, this causes a stall at the
10814 second branch, hence we won't need the target for the
10815 second branch for two cycles after the launch of the first
10816 branch. */
10817 if (cost > orig_cost - 2)
10818 cost = orig_cost - 2;
10820 else
10821 cost = 0;
10824 else if (get_attr_is_mac_media (insn)
10825 && get_attr_is_mac_media (dep_insn))
10826 cost = 1;
10828 else if (! reload_completed
10829 && GET_CODE (PATTERN (insn)) == SET
10830 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10831 && GET_CODE (PATTERN (dep_insn)) == SET
10832 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10833 && cost < 4)
10834 cost = 4;
10835 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10836 that is needed at the target. */
10837 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10838 && ! flow_dependent_p (insn, dep_insn))
10839 cost--;
10841 else if (REG_NOTE_KIND (link) == 0)
10843 enum attr_type type;
10844 rtx dep_set;
10846 if (recog_memoized (insn) < 0
10847 || recog_memoized (dep_insn) < 0)
10848 return cost;
10850 dep_set = single_set (dep_insn);
10852 /* The latency that we specify in the scheduling description refers
10853 to the actual output, not to an auto-increment register; for that,
10854 the latency is one. */
10855 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10857 rtx set = single_set (insn);
10859 if (set
10860 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10861 && (!MEM_P (SET_DEST (set))
10862 || !reg_mentioned_p (SET_DEST (dep_set),
10863 XEXP (SET_DEST (set), 0))))
10864 cost = 1;
10866 /* The only input for a call that is timing-critical is the
10867 function's address. */
10868 if (CALL_P (insn))
10870 rtx call = get_call_rtx_from (insn);
10871 if (call
10872 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10873 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10874 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10875 cost -= TARGET_SH4_300 ? 3 : 6;
10877 /* Likewise, the most timing critical input for an sfuncs call
10878 is the function address. However, sfuncs typically start
10879 using their arguments pretty quickly.
10880 Assume a four cycle delay for SH4 before they are needed.
10881 Cached ST40-300 calls are quicker, so assume only a one
10882 cycle delay there.
10883 ??? Maybe we should encode the delays till input registers
10884 are needed by sfuncs into the sfunc call insn. */
10885 /* All sfunc calls are parallels with at least four components.
10886 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10887 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10888 && XVECLEN (PATTERN (insn), 0) >= 4
10889 && (reg = sfunc_uses_reg (insn)))
10891 if (! reg_set_p (reg, dep_insn))
10892 cost -= TARGET_SH4_300 ? 1 : 4;
10894 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10896 enum attr_type dep_type = get_attr_type (dep_insn);
10898 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10899 cost--;
10900 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10901 && (type = get_attr_type (insn)) != TYPE_CALL
10902 && type != TYPE_SFUNC)
10903 cost--;
10904 /* When the preceding instruction loads the shift amount of
10905 the following SHAD/SHLD, the latency of the load is increased
10906 by 1 cycle. */
10907 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10908 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10909 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10910 XEXP (SET_SRC (single_set (insn)),
10911 1)))
10912 cost++;
10913 /* When an LS group instruction with a latency of less than
10914 3 cycles is followed by a double-precision floating-point
10915 instruction, FIPR, or FTRV, the latency of the first
10916 instruction is increased to 3 cycles. */
10917 else if (cost < 3
10918 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10919 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10920 cost = 3;
10921 /* The lsw register of a double-precision computation is ready one
10922 cycle earlier. */
10923 else if (reload_completed
10924 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10925 && (use_pat = single_set (insn))
10926 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10927 SET_SRC (use_pat)))
10928 cost -= 1;
10930 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10931 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10932 cost -= 1;
10934 else if (TARGET_SH4_300)
10936 /* Stores need their input register two cycles later. */
10937 if (dep_set && cost >= 1
10938 && ((type = get_attr_type (insn)) == TYPE_STORE
10939 || type == TYPE_PSTORE
10940 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10942 rtx set = single_set (insn);
10944 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10945 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10947 cost -= 2;
10948 /* But don't reduce the cost below 1 if the address depends
10949 on a side effect of dep_insn. */
10950 if (cost < 1
10951 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10952 cost = 1;
10957 /* An anti-dependence penalty of two applies if the first insn is a double
10958 precision fadd / fsub / fmul. */
10959 else if (!TARGET_SH4_300
10960 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10961 && recog_memoized (dep_insn) >= 0
10962 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10963 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10964 /* A lot of alleged anti-flow dependences are fake,
10965 so check this one is real. */
10966 && flow_dependent_p (dep_insn, insn))
10967 cost = 2;
10969 return cost;
10972 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10973 if DEP_INSN is anti-flow dependent on INSN. */
10974 static bool
10975 flow_dependent_p (rtx insn, rtx dep_insn)
10977 rtx tmp = PATTERN (insn);
10979 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10980 return tmp == NULL_RTX;
10983 /* A helper function for flow_dependent_p called through note_stores. */
10984 static void
10985 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10987 rtx * pinsn = (rtx *) data;
10989 if (*pinsn && reg_referenced_p (x, *pinsn))
10990 *pinsn = NULL_RTX;
10993 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10994 'special function' patterns (type sfunc) that clobber pr, but that
10995 do not look like function calls to leaf_function_p. Hence we must
10996 do this extra check. */
10997 static int
10998 sh_pr_n_sets (void)
11000 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11003 /* Return where to allocate pseudo for a given hard register initial
11004 value. */
11005 static rtx
11006 sh_allocate_initial_value (rtx hard_reg)
11008 rtx x;
11010 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
11012 if (crtl->is_leaf
11013 && ! sh_pr_n_sets ()
11014 && ! (TARGET_SHCOMPACT
11015 && ((crtl->args.info.call_cookie
11016 & ~ CALL_COOKIE_RET_TRAMP (1))
11017 || crtl->saves_all_registers)))
11018 x = hard_reg;
11019 else
11020 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
11022 else
11023 x = NULL_RTX;
11025 return x;
11028 /* This function returns "2" to indicate dual issue for the SH4
11029 processor. To be used by the DFA pipeline description. */
11030 static int
11031 sh_issue_rate (void)
11033 if (TARGET_SUPERSCALAR)
11034 return 2;
11035 else
11036 return 1;
11039 /* Functions for ready queue reordering for sched1. */
11041 /* Get weight for mode for a set x. */
11042 static short
11043 find_set_regmode_weight (rtx x, enum machine_mode mode)
11045 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
11046 return 1;
11047 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
11049 if (REG_P (SET_DEST (x)))
11051 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
11052 return 1;
11053 else
11054 return 0;
11056 return 1;
11058 return 0;
11061 /* Get regmode weight for insn. */
11062 static short
11063 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
11065 short reg_weight = 0;
11066 rtx x;
11068 /* Increment weight for each register born here. */
11069 x = PATTERN (insn);
11070 reg_weight += find_set_regmode_weight (x, mode);
11071 if (GET_CODE (x) == PARALLEL)
11073 int j;
11074 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11076 x = XVECEXP (PATTERN (insn), 0, j);
11077 reg_weight += find_set_regmode_weight (x, mode);
11080 /* Decrement weight for each register that dies here. */
11081 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11083 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11085 rtx note = XEXP (x, 0);
11086 if (REG_P (note) && GET_MODE (note) == mode)
11087 reg_weight--;
11090 return reg_weight;
11093 /* Calculate regmode weights for all insns of a basic block. */
11094 static void
11095 find_regmode_weight (basic_block b, enum machine_mode mode)
11097 rtx_insn *insn, *next_tail, *head, *tail;
11099 get_ebb_head_tail (b, b, &head, &tail);
11100 next_tail = NEXT_INSN (tail);
11102 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11104 /* Handle register life information. */
11105 if (!INSN_P (insn))
11106 continue;
11108 if (mode == SFmode)
11109 INSN_REGMODE_WEIGHT (insn, mode) =
11110 find_insn_regmode_weight (insn, mode)
11111 + 2 * find_insn_regmode_weight (insn, DFmode);
11112 else if (mode == SImode)
11113 INSN_REGMODE_WEIGHT (insn, mode) =
11114 find_insn_regmode_weight (insn, mode)
11115 + 2 * find_insn_regmode_weight (insn, DImode);
11119 /* Comparison function for ready queue sorting. */
11120 static int
11121 rank_for_reorder (const void *x, const void *y)
11123 rtx tmp = *(const rtx *) y;
11124 rtx tmp2 = *(const rtx *) x;
11126 /* The insn in a schedule group should be issued the first. */
11127 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11128 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11130 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11131 minimizes instruction movement, thus minimizing sched's effect on
11132 register pressure. */
11133 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11136 /* Resort the array A in which only element at index N may be out of order. */
11137 static void
11138 swap_reorder (rtx *a, int n)
11140 rtx insn = a[n - 1];
11141 int i = n - 2;
11143 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11145 a[i + 1] = a[i];
11146 i -= 1;
11148 a[i + 1] = insn;
11151 /* Sort the ready list by ascending priority. */
11152 static void
11153 ready_reorder (rtx *ready, int nready)
11155 if (nready == 2)
11156 swap_reorder (ready, nready);
11157 else if (nready > 2)
11158 qsort (ready, nready, sizeof (rtx), rank_for_reorder);
11161 /* Count life regions of r0 for a block. */
11162 static int
11163 find_r0_life_regions (basic_block b)
11165 rtx end, insn;
11166 rtx pset;
11167 rtx r0_reg;
11168 int live;
11169 int set;
11170 int death = 0;
11172 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11174 set = 1;
11175 live = 1;
11177 else
11179 set = 0;
11180 live = 0;
11183 insn = BB_HEAD (b);
11184 end = BB_END (b);
11185 r0_reg = gen_rtx_REG (SImode, R0_REG);
11186 while (1)
11188 if (INSN_P (insn))
11190 if (find_regno_note (insn, REG_DEAD, R0_REG))
11192 death++;
11193 live = 0;
11195 if (!live
11196 && (pset = single_set (insn))
11197 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11198 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11200 set++;
11201 live = 1;
11204 if (insn == end)
11205 break;
11206 insn = NEXT_INSN (insn);
11208 return set - death;
11211 /* Calculate regmode weights for all insns of all basic block. */
11212 static void
11213 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11214 int verbose ATTRIBUTE_UNUSED,
11215 int old_max_uid)
11217 basic_block b;
11219 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11220 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11221 r0_life_regions = 0;
11223 FOR_EACH_BB_REVERSE_FN (b, cfun)
11225 find_regmode_weight (b, SImode);
11226 find_regmode_weight (b, SFmode);
11227 if (!reload_completed)
11228 r0_life_regions += find_r0_life_regions (b);
11231 CURR_REGMODE_PRESSURE (SImode) = 0;
11232 CURR_REGMODE_PRESSURE (SFmode) = 0;
11235 /* Cleanup. */
11236 static void
11237 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11238 int verbose ATTRIBUTE_UNUSED)
11240 if (regmode_weight[0])
11242 free (regmode_weight[0]);
11243 regmode_weight[0] = NULL;
11245 if (regmode_weight[1])
11247 free (regmode_weight[1]);
11248 regmode_weight[1] = NULL;
11252 /* The scalar modes supported differs from the default version in TImode
11253 for 32-bit SHMEDIA. */
11254 static bool
11255 sh_scalar_mode_supported_p (enum machine_mode mode)
11257 if (TARGET_SHMEDIA32 && mode == TImode)
11258 return false;
11260 return default_scalar_mode_supported_p (mode);
11263 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11264 keep count of register pressures on SImode and SFmode. */
11265 static int
11266 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11267 int sched_verbose ATTRIBUTE_UNUSED,
11268 rtx insn,
11269 int can_issue_more)
11271 if (GET_CODE (PATTERN (insn)) != USE
11272 && GET_CODE (PATTERN (insn)) != CLOBBER)
11273 cached_can_issue_more = can_issue_more - 1;
11274 else
11275 cached_can_issue_more = can_issue_more;
11277 if (reload_completed)
11278 return cached_can_issue_more;
11280 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11281 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11283 return cached_can_issue_more;
11286 static void
11287 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11288 int verbose ATTRIBUTE_UNUSED,
11289 int veclen ATTRIBUTE_UNUSED)
11291 CURR_REGMODE_PRESSURE (SImode) = 0;
11292 CURR_REGMODE_PRESSURE (SFmode) = 0;
11295 /* Some magic numbers. */
11296 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11297 functions that already have high pressure on r0. */
11298 #define R0_MAX_LIFE_REGIONS 2
11299 /* Register Pressure thresholds for SImode and SFmode registers. */
11300 #define SIMODE_MAX_WEIGHT 5
11301 #define SFMODE_MAX_WEIGHT 10
11303 /* Return true if the pressure is high for MODE. */
11304 static bool
11305 high_pressure (enum machine_mode mode)
11307 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11308 functions that already have high pressure on r0. */
11309 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11310 return true;
11312 if (mode == SFmode)
11313 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11314 else
11315 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11318 /* Reorder ready queue if register pressure is high. */
11319 static int
11320 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11321 int sched_verbose ATTRIBUTE_UNUSED,
11322 rtx *ready,
11323 int *n_readyp,
11324 int clock_var ATTRIBUTE_UNUSED)
11326 if (reload_completed)
11327 return sh_issue_rate ();
11329 if (high_pressure (SFmode) || high_pressure (SImode))
11331 ready_reorder (ready, *n_readyp);
11334 return sh_issue_rate ();
11337 /* Skip cycles if the current register pressure is high. */
11338 static int
11339 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11340 int sched_verbose ATTRIBUTE_UNUSED,
11341 rtx *ready ATTRIBUTE_UNUSED,
11342 int *n_readyp ATTRIBUTE_UNUSED,
11343 int clock_var ATTRIBUTE_UNUSED)
11345 if (reload_completed)
11346 return cached_can_issue_more;
11348 if (high_pressure(SFmode) || high_pressure (SImode))
11349 skip_cycles = 1;
11351 return cached_can_issue_more;
11354 /* Skip cycles without sorting the ready queue. This will move insn from
11355 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11356 queue by sh_reorder. */
11358 /* Generally, skipping these many cycles are sufficient for all insns to move
11359 from Q -> R. */
11360 #define MAX_SKIPS 8
11362 static int
11363 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11364 int sched_verbose ATTRIBUTE_UNUSED,
11365 rtx insn ATTRIBUTE_UNUSED,
11366 int last_clock_var,
11367 int clock_var,
11368 int *sort_p)
11370 if (reload_completed)
11371 return 0;
11373 if (skip_cycles)
11375 if ((clock_var - last_clock_var) < MAX_SKIPS)
11377 *sort_p = 0;
11378 return 1;
11380 /* If this is the last cycle we are skipping, allow reordering of R. */
11381 if ((clock_var - last_clock_var) == MAX_SKIPS)
11383 *sort_p = 1;
11384 return 1;
11388 skip_cycles = 0;
11390 return 0;
11393 /* SHmedia requires registers for branches, so we can't generate new
11394 branches past reload. */
11395 static bool
11396 sh_cannot_modify_jumps_p (void)
11398 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11401 static reg_class_t
11402 sh_target_reg_class (void)
11404 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11407 static bool
11408 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11410 if (! shmedia_space_reserved_for_target_registers)
11411 return 0;
11412 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11413 return 0;
11415 HARD_REG_SET dummy;
11416 if (calc_live_regs (&dummy) >= 6 * 8)
11417 return 1;
11418 return 0;
11421 static bool
11422 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11424 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11428 On the SH1..SH4, the trampoline looks like
11429 2 0002 D202 mov.l l2,r2
11430 1 0000 D301 mov.l l1,r3
11431 3 0004 422B jmp @r2
11432 4 0006 0009 nop
11433 5 0008 00000000 l1: .long area
11434 6 000c 00000000 l2: .long function
11436 SH5 (compact) uses r1 instead of r3 for the static chain. */
11439 /* Emit RTL insns to initialize the variable parts of a trampoline.
11440 FNADDR is an RTX for the address of the function's pure code.
11441 CXT is an RTX for the static chain value for the function. */
11442 static void
11443 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11445 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11446 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11448 if (TARGET_SHMEDIA64)
11450 rtx tramp_templ;
11451 int fixed_len;
11453 rtx movi1 = GEN_INT (0xcc000010);
11454 rtx shori1 = GEN_INT (0xc8000010);
11455 rtx src, dst;
11457 /* The following trampoline works within a +- 128 KB range for cxt:
11458 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11459 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11460 gettr tr1,r1; blink tr0,r63 */
11461 /* Address rounding makes it hard to compute the exact bounds of the
11462 offset for this trampoline, but we have a rather generous offset
11463 range, so frame_offset should do fine as an upper bound. */
11464 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11466 /* ??? could optimize this trampoline initialization
11467 by writing DImode words with two insns each. */
11468 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11469 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11470 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11471 insn = gen_rtx_AND (DImode, insn, mask);
11472 /* Or in ptb/u .,tr1 pattern */
11473 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11474 insn = force_operand (insn, NULL_RTX);
11475 insn = gen_lowpart (SImode, insn);
11476 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11477 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11478 insn = gen_rtx_AND (DImode, insn, mask);
11479 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11480 insn = gen_lowpart (SImode, insn);
11481 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11482 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11483 insn = gen_rtx_AND (DImode, insn, mask);
11484 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11485 insn = gen_lowpart (SImode, insn);
11486 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11487 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11488 insn = gen_rtx_AND (DImode, insn, mask);
11489 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11490 insn = gen_lowpart (SImode, insn);
11491 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11492 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11493 insn = gen_rtx_AND (DImode, insn, mask);
11494 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11495 insn = gen_lowpart (SImode, insn);
11496 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11497 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11498 GEN_INT (0x6bf10600));
11499 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11500 GEN_INT (0x4415fc10));
11501 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11502 GEN_INT (0x4401fff0));
11503 emit_insn (gen_ic_invalidate_line (tramp));
11504 return;
11506 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11507 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11509 tramp_templ = gen_datalabel_ref (tramp_templ);
11510 dst = tramp_mem;
11511 src = gen_const_mem (BLKmode, tramp_templ);
11512 set_mem_align (dst, 256);
11513 set_mem_align (src, 64);
11514 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11516 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11517 emit_move_insn (adjust_address (tramp_mem, Pmode,
11518 fixed_len + GET_MODE_SIZE (Pmode)),
11519 cxt);
11520 emit_insn (gen_ic_invalidate_line (tramp));
11521 return;
11523 else if (TARGET_SHMEDIA)
11525 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11526 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11527 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11528 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11529 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11530 rotated 10 right, and higher 16 bit of every 32 selected. */
11531 rtx movishori
11532 = force_reg (V2HImode, (simplify_gen_subreg
11533 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11534 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11535 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11537 fnaddr = force_reg (SImode, fnaddr);
11538 cxt = force_reg (SImode, cxt);
11539 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11540 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11541 movishori));
11542 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11543 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11544 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11545 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11546 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11547 gen_rtx_SUBREG (V2HImode, cxt, 0),
11548 movishori));
11549 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11550 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11551 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11552 if (TARGET_LITTLE_ENDIAN)
11554 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11555 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11557 else
11559 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11560 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11562 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11563 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11564 emit_insn (gen_ic_invalidate_line (tramp));
11565 return;
11567 else if (TARGET_SHCOMPACT)
11569 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11570 return;
11572 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11573 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11574 SImode));
11575 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11576 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11577 SImode));
11578 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11579 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11580 if (TARGET_HARD_SH4 || TARGET_SH5)
11582 if (!TARGET_INLINE_IC_INVALIDATE
11583 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
11584 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11585 FUNCTION_ORDINARY),
11586 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11587 else
11588 emit_insn (gen_ic_invalidate_line (tramp));
11592 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11593 static rtx
11594 sh_trampoline_adjust_address (rtx tramp)
11596 if (TARGET_SHMEDIA)
11597 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11598 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11599 return tramp;
11602 /* FIXME: This is overly conservative. A SHcompact function that
11603 receives arguments ``by reference'' will have them stored in its
11604 own stack frame, so it must not pass pointers or references to
11605 these arguments to other functions by means of sibling calls. */
11606 /* If PIC, we cannot make sibling calls to global functions
11607 because the PLT requires r12 to be live. */
11608 static bool
11609 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11611 return (1
11612 && (! TARGET_SHCOMPACT
11613 || crtl->args.info.stack_regs == 0)
11614 && ! sh_cfun_interrupt_handler_p ()
11615 && (! flag_pic
11616 || (decl && ! TREE_PUBLIC (decl))
11617 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11620 /* Machine specific built-in functions. */
11622 struct builtin_description
11624 bool (* const is_enabled) (void);
11625 const enum insn_code icode;
11626 const char *const name;
11627 int signature;
11628 tree fndecl;
11631 static bool
11632 shmedia_builtin_p (void)
11634 return TARGET_SHMEDIA;
11637 /* This function can be used if there are any built-ins that are not for
11638 SHmedia. It's commented out to avoid the defined-but-unused warning.
11639 static bool
11640 sh1_builtin_p (void)
11642 return TARGET_SH1;
11646 /* describe number and signedness of arguments; arg[0] == result
11647 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11648 /* 9: 64-bit pointer, 10: 32-bit pointer */
11649 static const char signature_args[][4] =
11651 #define SH_BLTIN_V2SI2 0
11652 { 4, 4 },
11653 #define SH_BLTIN_V4HI2 1
11654 { 4, 4 },
11655 #define SH_BLTIN_V2SI3 2
11656 { 4, 4, 4 },
11657 #define SH_BLTIN_V4HI3 3
11658 { 4, 4, 4 },
11659 #define SH_BLTIN_V8QI3 4
11660 { 4, 4, 4 },
11661 #define SH_BLTIN_MAC_HISI 5
11662 { 1, 4, 4, 1 },
11663 #define SH_BLTIN_SH_HI 6
11664 { 4, 4, 1 },
11665 #define SH_BLTIN_SH_SI 7
11666 { 4, 4, 1 },
11667 #define SH_BLTIN_V4HI2V2SI 8
11668 { 4, 4, 4 },
11669 #define SH_BLTIN_V4HI2V8QI 9
11670 { 4, 4, 4 },
11671 #define SH_BLTIN_SISF 10
11672 { 4, 2 },
11673 #define SH_BLTIN_LDUA_L 11
11674 { 2, 10 },
11675 #define SH_BLTIN_LDUA_Q 12
11676 { 1, 10 },
11677 #define SH_BLTIN_STUA_L 13
11678 { 0, 10, 2 },
11679 #define SH_BLTIN_STUA_Q 14
11680 { 0, 10, 1 },
11681 #define SH_BLTIN_LDUA_L64 15
11682 { 2, 9 },
11683 #define SH_BLTIN_LDUA_Q64 16
11684 { 1, 9 },
11685 #define SH_BLTIN_STUA_L64 17
11686 { 0, 9, 2 },
11687 #define SH_BLTIN_STUA_Q64 18
11688 { 0, 9, 1 },
11689 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11690 #define SH_BLTIN_2 19
11691 #define SH_BLTIN_SU 19
11692 { 1, 2 },
11693 #define SH_BLTIN_3 20
11694 #define SH_BLTIN_SUS 20
11695 { 2, 2, 1 },
11696 #define SH_BLTIN_PSSV 21
11697 { 0, 8, 2, 2 },
11698 #define SH_BLTIN_XXUU 22
11699 #define SH_BLTIN_UUUU 22
11700 { 1, 1, 1, 1 },
11701 #define SH_BLTIN_PV 23
11702 { 0, 8 },
11703 #define SH_BLTIN_VP 24
11704 { 8, 0 },
11706 /* mcmv: operands considered unsigned. */
11707 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11708 /* mperm: control value considered unsigned int. */
11709 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11710 /* mshards_q: returns signed short. */
11711 /* nsb: takes long long arg, returns unsigned char. */
11712 static struct builtin_description bdesc[] =
11714 { shmedia_builtin_p,
11715 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11716 { shmedia_builtin_p,
11717 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11718 { shmedia_builtin_p,
11719 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11720 { shmedia_builtin_p,
11721 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11722 { shmedia_builtin_p,
11723 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11724 { shmedia_builtin_p,
11725 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11726 { shmedia_builtin_p,
11727 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11728 { shmedia_builtin_p,
11729 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11730 { shmedia_builtin_p,
11731 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11732 { shmedia_builtin_p,
11733 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11734 { shmedia_builtin_p,
11735 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11736 { shmedia_builtin_p,
11737 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11738 { shmedia_builtin_p,
11739 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11740 { shmedia_builtin_p,
11741 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11742 { shmedia_builtin_p,
11743 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11744 { shmedia_builtin_p,
11745 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11746 { shmedia_builtin_p,
11747 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11748 { shmedia_builtin_p,
11749 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11750 { shmedia_builtin_p,
11751 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11752 { shmedia_builtin_p,
11753 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11754 { shmedia_builtin_p,
11755 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11756 { shmedia_builtin_p,
11757 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11758 { shmedia_builtin_p,
11759 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11760 { shmedia_builtin_p,
11761 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11762 { shmedia_builtin_p,
11763 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11764 { shmedia_builtin_p,
11765 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11766 { shmedia_builtin_p,
11767 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11768 { shmedia_builtin_p,
11769 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11770 { shmedia_builtin_p,
11771 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11772 { shmedia_builtin_p,
11773 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11774 { shmedia_builtin_p,
11775 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11776 { shmedia_builtin_p,
11777 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11778 { shmedia_builtin_p,
11779 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11780 { shmedia_builtin_p,
11781 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11782 { shmedia_builtin_p,
11783 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11784 { shmedia_builtin_p,
11785 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11786 { shmedia_builtin_p,
11787 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11788 { shmedia_builtin_p,
11789 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11790 { shmedia_builtin_p,
11791 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11792 { shmedia_builtin_p,
11793 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11794 { shmedia_builtin_p,
11795 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11796 { shmedia_builtin_p,
11797 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11798 { shmedia_builtin_p,
11799 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11800 { shmedia_builtin_p,
11801 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11802 { shmedia_builtin_p,
11803 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11804 { shmedia_builtin_p,
11805 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11806 { shmedia_builtin_p,
11807 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11808 { shmedia_builtin_p,
11809 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11810 { shmedia_builtin_p,
11811 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11812 { shmedia_builtin_p,
11813 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11814 { shmedia_builtin_p,
11815 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11816 { shmedia_builtin_p,
11817 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11818 { shmedia_builtin_p,
11819 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11820 { shmedia_builtin_p,
11821 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11822 { shmedia_builtin_p,
11823 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11824 { shmedia_builtin_p,
11825 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11826 { shmedia_builtin_p,
11827 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11828 { shmedia_builtin_p,
11829 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11830 { shmedia_builtin_p,
11831 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11832 { shmedia_builtin_p,
11833 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11834 { shmedia_builtin_p,
11835 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11836 { shmedia_builtin_p,
11837 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11838 { shmedia_builtin_p,
11839 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11840 { shmedia_builtin_p,
11841 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11842 { shmedia_builtin_p,
11843 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11844 { shmedia_builtin_p,
11845 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11846 { shmedia_builtin_p,
11847 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11848 { shmedia_builtin_p,
11849 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11850 { shmedia_builtin_p,
11851 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11852 { shmedia_builtin_p,
11853 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11854 { shmedia_builtin_p,
11855 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11856 { shmedia_builtin_p,
11857 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11858 { shmedia_builtin_p,
11859 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11860 { shmedia_builtin_p,
11861 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11862 { shmedia_builtin_p,
11863 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11864 { shmedia_builtin_p,
11865 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11866 { shmedia_builtin_p,
11867 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11868 { shmedia_builtin_p,
11869 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11870 { shmedia_builtin_p,
11871 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11872 { shmedia_builtin_p,
11873 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11874 { shmedia_builtin_p,
11875 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11876 { shmedia_builtin_p,
11877 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11878 { shmedia_builtin_p,
11879 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11882 static void
11883 sh_init_builtins (void)
11885 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11886 memset (shared, 0, sizeof shared);
11888 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
11890 builtin_description* d = &bdesc[di];
11892 if (!d->is_enabled ())
11893 continue;
11895 tree type, arg_type = NULL_TREE;
11896 int signature = d->signature;
11898 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11899 type = shared[signature];
11900 else
11902 int has_result = signature_args[signature][0] != 0;
11903 tree args[3];
11905 if ((signature_args[signature][1] & 8)
11906 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11907 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11908 continue;
11909 if (! TARGET_FPU_ANY
11910 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11911 continue;
11912 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
11913 args[i] = NULL_TREE;
11914 for (int i = 3; ; i--)
11916 int arg = signature_args[signature][i];
11917 int opno = i - 1 + has_result;
11919 if (arg & 8)
11920 arg_type = ptr_type_node;
11921 else if (arg)
11922 arg_type = (*lang_hooks.types.type_for_mode)
11923 (insn_data[d->icode].operand[opno].mode, (arg & 1));
11924 else if (i)
11925 continue;
11926 else
11927 arg_type = void_type_node;
11928 if (i == 0)
11929 break;
11930 args[i-1] = arg_type;
11932 type = build_function_type_list (arg_type, args[0], args[1],
11933 args[2], NULL_TREE);
11934 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11935 shared[signature] = type;
11937 d->fndecl =
11938 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11939 NULL, NULL_TREE);
11943 /* Implements target hook vector_mode_supported_p. */
11944 bool
11945 sh_vector_mode_supported_p (enum machine_mode mode)
11947 if (TARGET_FPU_ANY
11948 && ((mode == V2SFmode)
11949 || (mode == V4SFmode)
11950 || (mode == V16SFmode)))
11951 return true;
11953 else if (TARGET_SHMEDIA
11954 && ((mode == V8QImode)
11955 || (mode == V2HImode)
11956 || (mode == V4HImode)
11957 || (mode == V2SImode)))
11958 return true;
11960 return false;
11963 bool
11964 sh_frame_pointer_required (void)
11966 /* If needed override this in other tm.h files to cope with various OS
11967 lossage requiring a frame pointer. */
11968 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11969 return true;
11971 if (crtl->profile)
11972 return true;
11974 return false;
11977 /* Implements target hook dwarf_calling_convention. Return an enum
11978 of dwarf_calling_convention. */
11980 sh_dwarf_calling_convention (const_tree func)
11982 if (sh_attr_renesas_p (func))
11983 return DW_CC_GNU_renesas_sh;
11985 return DW_CC_normal;
11988 /* Returns the sh builtin decl for CODE. */
11989 static tree
11990 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11992 if (code >= ARRAY_SIZE (bdesc))
11993 return error_mark_node;
11995 if (!bdesc[code].is_enabled ())
11996 return error_mark_node;
11998 return bdesc[code].fndecl;
12001 /* Expand an expression EXP that calls a built-in function,
12002 with result going to TARGET if that's convenient
12003 (and in mode MODE if that's convenient).
12004 SUBTARGET may be used as the target for computing one of EXP's operands.
12005 IGNORE is nonzero if the value is to be ignored. */
12006 static rtx
12007 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12008 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
12010 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12011 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12012 const struct builtin_description *d = &bdesc[fcode];
12013 enum insn_code icode = d->icode;
12014 int signature = d->signature;
12015 int nop = 0;
12016 rtx op[4];
12018 if (signature_args[signature][0])
12020 if (ignore)
12021 return NULL_RTX;
12023 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12024 if (! target || GET_MODE (target) != tmode
12025 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12026 target = gen_reg_rtx (tmode);
12027 op[nop++] = target;
12029 else
12030 target = NULL_RTX;
12032 for (int i = 1; i <= 3; i++, nop++)
12034 tree arg;
12035 enum machine_mode opmode, argmode;
12036 tree optype;
12038 if (! signature_args[signature][i])
12039 break;
12040 arg = CALL_EXPR_ARG (exp, i - 1);
12041 if (arg == error_mark_node)
12042 return const0_rtx;
12043 if (signature_args[signature][i] & 8)
12045 opmode = ptr_mode;
12046 optype = ptr_type_node;
12048 else
12050 opmode = insn_data[icode].operand[nop].mode;
12051 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12053 argmode = TYPE_MODE (TREE_TYPE (arg));
12054 if (argmode != opmode)
12055 arg = build1 (NOP_EXPR, optype, arg);
12056 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12057 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12058 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12061 rtx pat = NULL_RTX;
12063 switch (nop)
12065 case 1:
12066 pat = (*insn_data[d->icode].genfun) (op[0]);
12067 break;
12068 case 2:
12069 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12070 break;
12071 case 3:
12072 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12073 break;
12074 case 4:
12075 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12076 break;
12077 default:
12078 gcc_unreachable ();
12080 if (! pat)
12081 return NULL_RTX;
12082 emit_insn (pat);
12083 return target;
12086 void
12087 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12089 rtx sel0 = const0_rtx;
12090 rtx sel1 = const1_rtx;
12091 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12092 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12094 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12095 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12098 void
12099 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12101 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12103 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12104 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12107 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12108 We can allow any mode in any general register. The special registers
12109 only allow SImode. Don't allow any mode in the PR.
12111 We cannot hold DCmode values in the XD registers because alter_reg
12112 handles subregs of them incorrectly. We could work around this by
12113 spacing the XD registers like the DR registers, but this would require
12114 additional memory in every compilation to hold larger register vectors.
12115 We could hold SFmode / SCmode values in XD registers, but that
12116 would require a tertiary reload when reloading from / to memory,
12117 and a secondary reload to reload from / to general regs; that
12118 seems to be a losing proposition.
12120 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12121 it won't be ferried through GP registers first. */
12122 bool
12123 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
12125 if (SPECIAL_REGISTER_P (regno))
12126 return mode == SImode;
12128 if (regno == FPUL_REG)
12129 return (mode == SImode || mode == SFmode);
12131 if (FP_REGISTER_P (regno) && mode == SFmode)
12132 return true;
12134 if (mode == V2SFmode)
12136 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12137 || GENERAL_REGISTER_P (regno)))
12138 return true;
12139 else
12140 return false;
12143 if (mode == V4SFmode)
12145 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12146 || GENERAL_REGISTER_P (regno))
12147 return true;
12148 else
12149 return false;
12152 if (mode == V16SFmode)
12154 if (TARGET_SHMEDIA)
12156 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12157 return true;
12158 else
12159 return false;
12161 else
12162 return regno == FIRST_XD_REG;
12165 if (FP_REGISTER_P (regno))
12167 if (mode == SFmode
12168 || mode == SImode
12169 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12170 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12171 || mode == DCmode
12172 || (TARGET_SHMEDIA
12173 && (mode == DFmode || mode == DImode
12174 || mode == V2SFmode || mode == TImode)))
12175 && ((regno - FIRST_FP_REG) & 1) == 0)
12176 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12177 && ((regno - FIRST_FP_REG) & 3) == 0))
12178 return true;
12179 else
12180 return false;
12183 if (XD_REGISTER_P (regno))
12184 return mode == DFmode;
12186 if (TARGET_REGISTER_P (regno))
12187 return (mode == DImode || mode == SImode || mode == PDImode);
12189 if (regno == PR_REG)
12190 return mode == SImode;
12192 if (regno == FPSCR_REG)
12193 return mode == PSImode;
12195 /* FIXME. This works around PR target/37633 for -O0. */
12196 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12198 unsigned int n = GET_MODE_SIZE (mode) / 8;
12200 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12201 && regno <= FIRST_GENERAL_REG + 14)
12202 return false;
12205 return true;
12208 /* Return the class of registers for which a mode change from FROM to TO
12209 is invalid. */
12210 bool
12211 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
12212 enum reg_class rclass)
12214 /* We want to enable the use of SUBREGs as a means to
12215 VEC_SELECT a single element of a vector. */
12217 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12218 This can be problematic when SFmode vector subregs need to be accessed
12219 on the stack with displacement addressing, as it happens with -O0.
12220 Thus we disallow the mode change for -O0. */
12221 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12222 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12224 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12226 if (TARGET_LITTLE_ENDIAN)
12228 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12229 return reg_classes_intersect_p (DF_REGS, rclass);
12231 else
12233 if (GET_MODE_SIZE (from) < 8)
12234 return reg_classes_intersect_p (DF_REGS, rclass);
12237 return false;
12240 /* Return true if registers in machine mode MODE will likely be
12241 allocated to registers in small register classes. */
12242 bool
12243 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
12245 return (! TARGET_SHMEDIA);
12248 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12249 that label is used. */
12250 void
12251 sh_mark_label (rtx address, int nuses)
12253 if (GOTOFF_P (address))
12255 /* Extract the label or symbol. */
12256 address = XEXP (address, 0);
12257 if (GET_CODE (address) == PLUS)
12258 address = XEXP (address, 0);
12259 address = XVECEXP (address, 0, 0);
12261 if (GET_CODE (address) == LABEL_REF
12262 && LABEL_P (XEXP (address, 0)))
12263 LABEL_NUSES (XEXP (address, 0)) += nuses;
12266 /* Compute extra cost of moving data between one register class
12267 and another.
12269 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12270 uses this information. Hence, the general register <-> floating point
12271 register information here is not used for SFmode. */
12272 static int
12273 sh_register_move_cost (enum machine_mode mode,
12274 reg_class_t srcclass, reg_class_t dstclass)
12276 if (dstclass == T_REGS || dstclass == PR_REGS)
12277 return 10;
12279 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12280 return 4;
12282 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12283 && REGCLASS_HAS_FP_REG (srcclass)
12284 && REGCLASS_HAS_FP_REG (dstclass))
12285 return 4;
12287 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12288 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12290 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12291 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12292 return 9;
12294 if ((REGCLASS_HAS_FP_REG (dstclass)
12295 && REGCLASS_HAS_GENERAL_REG (srcclass))
12296 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12297 && REGCLASS_HAS_FP_REG (srcclass)))
12299 /* Discourage trying to use fp regs for a pointer. This also
12300 discourages fp regs with SImode because Pmode is an alias
12301 of SImode on this target. See PR target/48596. */
12302 int addend = (mode == Pmode) ? 40 : 0;
12304 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12305 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12308 if ((dstclass == FPUL_REGS
12309 && REGCLASS_HAS_GENERAL_REG (srcclass))
12310 || (srcclass == FPUL_REGS
12311 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12312 return 5;
12314 if ((dstclass == FPUL_REGS
12315 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12316 || (srcclass == FPUL_REGS
12317 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12318 return 7;
12320 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12321 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12322 return 20;
12324 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12325 if (TARGET_SHMEDIA
12326 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12328 if (sh_gettrcost >= 0)
12329 return sh_gettrcost;
12330 else if (!TARGET_PT_FIXED)
12331 return 100;
12334 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12335 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12336 return 4;
12338 if (TARGET_SHMEDIA
12339 || (TARGET_FMOVD
12340 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12341 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12342 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12344 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12347 static rtx
12348 emit_load_ptr (rtx reg, rtx addr)
12350 rtx mem = gen_const_mem (ptr_mode, addr);
12352 if (Pmode != ptr_mode)
12353 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12354 return emit_move_insn (reg, mem);
12357 static void
12358 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12359 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12360 tree function)
12362 CUMULATIVE_ARGS cum;
12363 int structure_value_byref = 0;
12364 rtx this_rtx, this_value, sibcall, insns, funexp;
12365 tree funtype = TREE_TYPE (function);
12366 int simple_add = CONST_OK_FOR_ADD (delta);
12367 int did_load = 0;
12368 rtx scratch0, scratch1, scratch2;
12369 unsigned i;
12371 reload_completed = 1;
12372 epilogue_completed = 1;
12373 crtl->uses_only_leaf_regs = 1;
12375 emit_note (NOTE_INSN_PROLOGUE_END);
12377 /* Find the "this" pointer. We have such a wide range of ABIs for the
12378 SH that it's best to do this completely machine independently.
12379 "this" is passed as first argument, unless a structure return pointer
12380 comes first, in which case "this" comes second. */
12381 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12382 #ifndef PCC_STATIC_STRUCT_RETURN
12383 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12384 structure_value_byref = 1;
12385 #endif /* not PCC_STATIC_STRUCT_RETURN */
12386 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12388 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12390 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12392 this_rtx
12393 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12395 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12396 static chain pointer (even if you can't have nested virtual functions
12397 right now, someone might implement them sometime), and the rest of the
12398 registers are used for argument passing, are callee-saved, or reserved. */
12399 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12400 -ffixed-reg has been used. */
12401 if (! call_used_regs[0] || fixed_regs[0])
12402 error ("r0 needs to be available as a call-clobbered register");
12403 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12404 if (! TARGET_SH5)
12406 if (call_used_regs[1] && ! fixed_regs[1])
12407 scratch1 = gen_rtx_REG (ptr_mode, 1);
12408 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12409 pointing where to return struct values. */
12410 if (call_used_regs[3] && ! fixed_regs[3])
12411 scratch2 = gen_rtx_REG (Pmode, 3);
12413 else if (TARGET_SHMEDIA)
12415 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12416 if (i != REGNO (scratch0) &&
12417 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12419 scratch1 = gen_rtx_REG (ptr_mode, i);
12420 break;
12422 if (scratch1 == scratch0)
12423 error ("need a second call-clobbered general purpose register");
12424 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12425 if (call_used_regs[i] && ! fixed_regs[i])
12427 scratch2 = gen_rtx_REG (Pmode, i);
12428 break;
12430 if (scratch2 == scratch0)
12431 error ("need a call-clobbered target register");
12434 this_value = plus_constant (Pmode, this_rtx, delta);
12435 if (vcall_offset
12436 && (simple_add || scratch0 != scratch1)
12437 && strict_memory_address_p (ptr_mode, this_value))
12439 emit_load_ptr (scratch0, this_value);
12440 did_load = 1;
12443 if (!delta)
12444 ; /* Do nothing. */
12445 else if (simple_add)
12446 emit_move_insn (this_rtx, this_value);
12447 else
12449 emit_move_insn (scratch1, GEN_INT (delta));
12450 emit_insn (gen_add2_insn (this_rtx, scratch1));
12453 if (vcall_offset)
12455 rtx offset_addr;
12457 if (!did_load)
12458 emit_load_ptr (scratch0, this_rtx);
12460 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12461 if (strict_memory_address_p (ptr_mode, offset_addr))
12462 ; /* Do nothing. */
12463 else if (! TARGET_SH5 && scratch0 != scratch1)
12465 /* scratch0 != scratch1, and we have indexed loads. Get better
12466 schedule by loading the offset into r1 and using an indexed
12467 load - then the load of r1 can issue before the load from
12468 (this_rtx + delta) finishes. */
12469 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12470 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12472 else if (CONST_OK_FOR_ADD (vcall_offset))
12474 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12475 offset_addr = scratch0;
12477 else if (scratch0 != scratch1)
12479 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12480 emit_insn (gen_add2_insn (scratch0, scratch1));
12481 offset_addr = scratch0;
12483 else
12484 gcc_unreachable (); /* FIXME */
12485 emit_load_ptr (scratch0, offset_addr);
12487 if (Pmode != ptr_mode)
12488 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12489 emit_insn (gen_add2_insn (this_rtx, scratch0));
12492 /* Generate a tail call to the target function. */
12493 if (! TREE_USED (function))
12495 assemble_external (function);
12496 TREE_USED (function) = 1;
12498 funexp = XEXP (DECL_RTL (function), 0);
12499 /* If the function is overridden, so is the thunk, hence we don't
12500 need GOT addressing even if this is a public symbol. */
12501 #if 0
12502 if (TARGET_SH1 && ! flag_weak)
12503 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12504 else
12505 #endif
12506 if (TARGET_SH2 && flag_pic)
12508 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12509 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12511 else
12513 if (TARGET_SHMEDIA && flag_pic)
12515 funexp = gen_sym2PIC (funexp);
12516 PUT_MODE (funexp, Pmode);
12518 emit_move_insn (scratch2, funexp);
12519 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12520 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12522 sibcall = emit_call_insn (sibcall);
12523 SIBLING_CALL_P (sibcall) = 1;
12524 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12525 emit_barrier ();
12527 /* Run just enough of rest_of_compilation to do scheduling and get
12528 the insns emitted. Note that use_thunk calls
12529 assemble_start_function and assemble_end_function. */
12531 insns = get_insns ();
12533 if (optimize > 0)
12535 if (! cfun->cfg)
12536 init_flow (cfun);
12537 split_all_insns_noflow ();
12540 sh_reorg ();
12541 shorten_branches (insns);
12542 final_start_function (insns, file, 1);
12543 final (insns, file, 1);
12544 final_end_function ();
12546 reload_completed = 0;
12547 epilogue_completed = 0;
12551 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12553 rtx sym;
12555 /* If this is not an ordinary function, the name usually comes from a
12556 string literal or an sprintf buffer. Make sure we use the same
12557 string consistently, so that cse will be able to unify address loads. */
12558 if (kind != FUNCTION_ORDINARY)
12559 name = IDENTIFIER_POINTER (get_identifier (name));
12560 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12561 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12562 if (flag_pic)
12563 switch (kind)
12565 case FUNCTION_ORDINARY:
12566 break;
12567 case SFUNC_GOT:
12569 rtx reg = target ? target : gen_reg_rtx (Pmode);
12571 emit_insn (gen_symGOT2reg (reg, sym));
12572 sym = reg;
12573 break;
12575 case SFUNC_STATIC:
12577 /* ??? To allow cse to work, we use GOTOFF relocations.
12578 We could add combiner patterns to transform this into
12579 straight pc-relative calls with sym2PIC / bsrf when
12580 label load and function call are still 1:1 and in the
12581 same basic block during combine. */
12582 rtx reg = target ? target : gen_reg_rtx (Pmode);
12584 emit_insn (gen_symGOTOFF2reg (reg, sym));
12585 sym = reg;
12586 break;
12589 if (target && sym != target)
12591 emit_move_insn (target, sym);
12592 return target;
12594 return sym;
12597 /* Find the number of a general purpose register in S. */
12598 static int
12599 scavenge_reg (HARD_REG_SET *s)
12601 int r;
12602 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12603 if (TEST_HARD_REG_BIT (*s, r))
12604 return r;
12605 return -1;
12609 sh_get_pr_initial_val (void)
12611 rtx val;
12613 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12614 PR register on SHcompact, because it might be clobbered by the prologue.
12615 We check first if that is known to be the case. */
12616 if (TARGET_SHCOMPACT
12617 && ((crtl->args.info.call_cookie
12618 & ~ CALL_COOKIE_RET_TRAMP (1))
12619 || crtl->saves_all_registers))
12620 return gen_frame_mem (SImode, return_address_pointer_rtx);
12622 /* If we haven't finished rtl generation, there might be a nonlocal label
12623 that we haven't seen yet.
12624 ??? get_hard_reg_initial_val fails if it is called after register
12625 allocation has started, unless it has been called before for the
12626 same register. And even then, we end in trouble if we didn't use
12627 the register in the same basic block before. So call
12628 get_hard_reg_initial_val now and wrap it in an unspec if we might
12629 need to replace it. */
12630 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12631 combine can put the pseudo returned by get_hard_reg_initial_val into
12632 instructions that need a general purpose registers, which will fail to
12633 be recognized when the pseudo becomes allocated to PR. */
12635 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12636 if (TARGET_SH1)
12637 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12638 return val;
12641 bool
12642 sh_expand_t_scc (rtx operands[])
12644 enum rtx_code code = GET_CODE (operands[1]);
12645 rtx target = operands[0];
12646 rtx op0 = operands[2];
12647 rtx op1 = operands[3];
12648 rtx result = target;
12649 HOST_WIDE_INT val;
12651 if (!REG_P (op0) || REGNO (op0) != T_REG
12652 || !CONST_INT_P (op1))
12653 return false;
12654 if (!REG_P (result))
12655 result = gen_reg_rtx (SImode);
12656 val = INTVAL (op1);
12657 if ((code == EQ && val == 1) || (code == NE && val == 0))
12658 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12659 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12660 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12661 else if (code == EQ || code == NE)
12662 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12663 else
12664 return false;
12665 if (result != target)
12666 emit_move_insn (target, result);
12667 return true;
12670 /* INSN is an sfunc; return the rtx that describes the address used. */
12671 static rtx
12672 extract_sfunc_addr (rtx insn)
12674 rtx pattern, part = NULL_RTX;
12675 int len, i;
12677 pattern = PATTERN (insn);
12678 len = XVECLEN (pattern, 0);
12679 for (i = 0; i < len; i++)
12681 part = XVECEXP (pattern, 0, i);
12682 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12683 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12684 return XEXP (part, 0);
12686 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12687 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12690 /* Verify that the register in use_sfunc_addr still agrees with the address
12691 used in the sfunc. This prevents fill_slots_from_thread from changing
12692 use_sfunc_addr.
12693 INSN is the use_sfunc_addr instruction, and REG is the register it
12694 guards. */
12695 bool
12696 check_use_sfunc_addr (rtx insn, rtx reg)
12698 /* Search for the sfunc. It should really come right after INSN. */
12699 while ((insn = NEXT_INSN (insn)))
12701 if (LABEL_P (insn) || JUMP_P (insn))
12702 break;
12703 if (! INSN_P (insn))
12704 continue;
12706 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
12707 insn = XVECEXP (PATTERN (insn), 0, 0);
12708 if (GET_CODE (PATTERN (insn)) != PARALLEL
12709 || get_attr_type (insn) != TYPE_SFUNC)
12710 continue;
12711 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12713 gcc_unreachable ();
12716 /* This function returns a constant rtx that represents 2**15 / pi in
12717 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12718 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12719 static GTY(()) rtx sh_fsca_sf2int_rtx;
12722 sh_fsca_sf2int (void)
12724 if (! sh_fsca_sf2int_rtx)
12726 REAL_VALUE_TYPE rv;
12728 real_from_string (&rv, "10430.378350470453");
12729 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12732 return sh_fsca_sf2int_rtx;
12735 /* This function returns a constant rtx that represents pi / 2**15 in
12736 SFmode. It's used to scale SFmode angles, in radians, to a
12737 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12738 maps to 0x10000. */
12739 static GTY(()) rtx sh_fsca_int2sf_rtx;
12742 sh_fsca_int2sf (void)
12744 if (! sh_fsca_int2sf_rtx)
12746 REAL_VALUE_TYPE rv;
12748 real_from_string (&rv, "9.587379924285257e-5");
12749 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12752 return sh_fsca_int2sf_rtx;
12755 /* Initialize the CUMULATIVE_ARGS structure. */
12756 void
12757 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12758 tree fntype,
12759 rtx libname ATTRIBUTE_UNUSED,
12760 tree fndecl,
12761 signed int n_named_args,
12762 enum machine_mode mode)
12764 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12765 pcum->free_single_fp_reg = 0;
12766 pcum->stack_regs = 0;
12767 pcum->byref_regs = 0;
12768 pcum->byref = 0;
12769 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12771 /* XXX - Should we check TARGET_HITACHI here ??? */
12772 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12774 if (fntype)
12776 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12777 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12778 pcum->prototype_p = prototype_p (fntype);
12779 pcum->arg_count [(int) SH_ARG_INT]
12780 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12782 pcum->call_cookie
12783 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12784 && pcum->arg_count [(int) SH_ARG_INT] == 0
12785 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12786 ? int_size_in_bytes (TREE_TYPE (fntype))
12787 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12788 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12789 == FIRST_RET_REG));
12791 else
12793 pcum->arg_count [(int) SH_ARG_INT] = 0;
12794 pcum->prototype_p = FALSE;
12795 if (mode != VOIDmode)
12797 pcum->call_cookie =
12798 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12799 && GET_MODE_SIZE (mode) > 4
12800 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12802 /* If the default ABI is the Renesas ABI then all library
12803 calls must assume that the library will be using the
12804 Renesas ABI. So if the function would return its result
12805 in memory then we must force the address of this memory
12806 block onto the stack. Ideally we would like to call
12807 targetm.calls.return_in_memory() here but we do not have
12808 the TYPE or the FNDECL available so we synthesize the
12809 contents of that function as best we can. */
12810 pcum->force_mem =
12811 (TARGET_DEFAULT & MASK_HITACHI)
12812 && (mode == BLKmode
12813 || (GET_MODE_SIZE (mode) > 4
12814 && !(mode == DFmode
12815 && TARGET_FPU_DOUBLE)));
12817 else
12819 pcum->call_cookie = 0;
12820 pcum->force_mem = FALSE;
12825 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12826 not enter into CONST_DOUBLE for the replace.
12828 Note that copying is not done so X must not be shared unless all copies
12829 are to be modified.
12831 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12832 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12833 replacements[n*2+1] - and that we take mode changes into account.
12835 If a replacement is ambiguous, return NULL_RTX.
12837 If MODIFY is zero, don't modify any rtl in place,
12838 just return zero or nonzero for failure / success. */
12840 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12842 int i, j;
12843 const char *fmt;
12845 /* The following prevents loops occurrence when we change MEM in
12846 CONST_DOUBLE onto the same CONST_DOUBLE. */
12847 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
12848 return x;
12850 for (i = n_replacements - 1; i >= 0 ; i--)
12851 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12852 return replacements[i*2+1];
12854 /* Allow this function to make replacements in EXPR_LISTs. */
12855 if (x == NULL_RTX)
12856 return NULL_RTX;
12858 if (GET_CODE (x) == SUBREG)
12860 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12861 n_replacements, modify);
12863 if (CONST_INT_P (new_rtx))
12865 x = simplify_subreg (GET_MODE (x), new_rtx,
12866 GET_MODE (SUBREG_REG (x)),
12867 SUBREG_BYTE (x));
12868 if (! x)
12869 abort ();
12871 else if (modify)
12872 SUBREG_REG (x) = new_rtx;
12874 return x;
12876 else if (REG_P (x))
12878 unsigned regno = REGNO (x);
12879 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12880 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12881 rtx result = NULL_RTX;
12883 for (i = n_replacements - 1; i >= 0; i--)
12885 rtx from = replacements[i*2];
12886 rtx to = replacements[i*2+1];
12887 unsigned from_regno, from_nregs, to_regno, new_regno;
12889 if (!REG_P (from))
12890 continue;
12891 from_regno = REGNO (from);
12892 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12893 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12894 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12896 if (regno < from_regno
12897 || regno + nregs > from_regno + nregs
12898 || !REG_P (to)
12899 || result)
12900 return NULL_RTX;
12901 to_regno = REGNO (to);
12902 if (to_regno < FIRST_PSEUDO_REGISTER)
12904 new_regno = regno + to_regno - from_regno;
12905 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12906 != nregs)
12907 return NULL_RTX;
12908 result = gen_rtx_REG (GET_MODE (x), new_regno);
12910 else if (GET_MODE (x) <= GET_MODE (to))
12911 result = gen_lowpart_common (GET_MODE (x), to);
12912 else
12913 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12916 return result ? result : x;
12918 else if (GET_CODE (x) == ZERO_EXTEND)
12920 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12921 n_replacements, modify);
12923 if (CONST_INT_P (new_rtx))
12925 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12926 new_rtx, GET_MODE (XEXP (x, 0)));
12927 if (! x)
12928 abort ();
12930 else if (modify)
12931 XEXP (x, 0) = new_rtx;
12933 return x;
12936 fmt = GET_RTX_FORMAT (GET_CODE (x));
12937 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12939 rtx new_rtx;
12941 if (fmt[i] == 'e')
12943 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12944 n_replacements, modify);
12945 if (!new_rtx)
12946 return NULL_RTX;
12947 if (modify)
12948 XEXP (x, i) = new_rtx;
12950 else if (fmt[i] == 'E')
12951 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12953 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12954 n_replacements, modify);
12955 if (!new_rtx)
12956 return NULL_RTX;
12957 if (modify)
12958 XVECEXP (x, i, j) = new_rtx;
12962 return x;
12966 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12968 enum rtx_code code = TRUNCATE;
12970 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12972 rtx inner = XEXP (x, 0);
12973 enum machine_mode inner_mode = GET_MODE (inner);
12975 if (inner_mode == mode)
12976 return inner;
12977 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12978 x = inner;
12979 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12980 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12982 code = GET_CODE (x);
12983 x = inner;
12986 return gen_rtx_fmt_e (code, mode, x);
12989 /* Called via for_each_rtx after reload, to clean up truncates of
12990 registers that span multiple actual hard registers. */
12992 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12994 rtx x = *p, reg;
12996 if (GET_CODE (x) != TRUNCATE)
12997 return 0;
12998 reg = XEXP (x, 0);
12999 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
13001 enum machine_mode reg_mode = GET_MODE (reg);
13002 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
13003 subreg_lowpart_offset (DImode, reg_mode));
13004 *(int*) n_changes += 1;
13005 return -1;
13007 return 0;
13010 /* Load and store depend on the highpart of the address. However,
13011 set_attr_alternative does not give well-defined results before reload,
13012 so we must look at the rtl ourselves to see if any of the feeding
13013 registers is used in a memref.
13015 Called by sh_contains_memref_p via for_each_rtx. */
13016 static int
13017 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
13019 return (MEM_P (*loc));
13022 /* Return true iff INSN contains a MEM. */
13023 bool
13024 sh_contains_memref_p (rtx insn)
13026 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
13029 /* Return true iff INSN loads a banked register. */
13030 bool
13031 sh_loads_bankedreg_p (rtx insn)
13033 if (GET_CODE (PATTERN (insn)) == SET)
13035 rtx op = SET_DEST (PATTERN(insn));
13036 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
13037 return true;
13040 return false;
13043 /* FNADDR is the MEM expression from a call expander. Return an address
13044 to use in an SHmedia insn pattern. */
13046 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13048 int is_sym;
13050 fnaddr = XEXP (fnaddr, 0);
13051 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13052 if (flag_pic && is_sym)
13054 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13056 rtx reg = gen_reg_rtx (Pmode);
13058 /* We must not use GOTPLT for sibcalls, because PIC_REG
13059 must be restored before the PLT code gets to run. */
13060 if (is_sibcall)
13061 emit_insn (gen_symGOT2reg (reg, fnaddr));
13062 else
13063 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13064 fnaddr = reg;
13066 else
13068 fnaddr = gen_sym2PIC (fnaddr);
13069 PUT_MODE (fnaddr, Pmode);
13072 /* If ptabs might trap, make this visible to the rest of the compiler.
13073 We generally assume that symbols pertain to valid locations, but
13074 it is possible to generate invalid symbols with asm or linker tricks.
13075 In a list of functions where each returns its successor, an invalid
13076 symbol might denote an empty list. */
13077 if (!TARGET_PT_FIXED
13078 && (!is_sym || TARGET_INVALID_SYMBOLS)
13079 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13081 rtx tr = gen_reg_rtx (PDImode);
13083 emit_insn (gen_ptabs (tr, fnaddr));
13084 fnaddr = tr;
13086 else if (! target_reg_operand (fnaddr, Pmode))
13087 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13088 return fnaddr;
13091 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13092 static reg_class_t
13093 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13095 if (rclass == NO_REGS
13096 && TARGET_SHMEDIA
13097 && (CONST_DOUBLE_P (x)
13098 || GET_CODE (x) == SYMBOL_REF
13099 || PIC_ADDR_P (x)))
13100 return GENERAL_REGS;
13102 return rclass;
13105 /* Implement TARGET_SECONDARY_RELOAD. */
13106 static reg_class_t
13107 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13108 enum machine_mode mode, secondary_reload_info *sri)
13110 enum reg_class rclass = (enum reg_class) rclass_i;
13112 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13113 && REG_P (XEXP (XEXP (x, 0), 0))
13114 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13115 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13117 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13118 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13120 if (REG_P (x) && REGNO (x) == GBR_REG)
13121 return NO_REGS;
13123 if (in_p)
13125 if (REGCLASS_HAS_FP_REG (rclass)
13126 && ! TARGET_SHMEDIA
13127 && immediate_operand ((x), mode)
13128 && ! ((fp_zero_operand (x) || fp_one_operand (x))
13129 && mode == SFmode && fldi_ok ()))
13130 switch (mode)
13132 case SFmode:
13133 sri->icode = CODE_FOR_reload_insf__frn;
13134 return NO_REGS;
13135 case DFmode:
13136 sri->icode = CODE_FOR_reload_indf__frn;
13137 return NO_REGS;
13138 case SImode:
13139 /* ??? If we knew that we are in the appropriate mode -
13140 single precision - we could use a reload pattern directly. */
13141 return FPUL_REGS;
13142 default:
13143 abort ();
13145 if (rclass == FPUL_REGS
13146 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13147 || REGNO (x) == T_REG))
13148 || GET_CODE (x) == PLUS))
13149 return GENERAL_REGS;
13150 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13152 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13153 return GENERAL_REGS;
13154 else if (mode == SFmode)
13155 return FP_REGS;
13156 sri->icode = CODE_FOR_reload_insi__i_fpul;
13157 return NO_REGS;
13159 if (rclass == FPSCR_REGS
13160 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13161 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13162 return GENERAL_REGS;
13163 if (REGCLASS_HAS_FP_REG (rclass)
13164 && TARGET_SHMEDIA
13165 && immediate_operand (x, mode)
13166 && x != CONST0_RTX (GET_MODE (x))
13167 && GET_MODE (x) != V4SFmode)
13168 return GENERAL_REGS;
13169 if ((mode == QImode || mode == HImode)
13170 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13172 sri->icode = ((mode == QImode)
13173 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13174 return NO_REGS;
13176 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13177 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13178 return TARGET_REGS;
13179 } /* end of input-only processing. */
13181 if (((REGCLASS_HAS_FP_REG (rclass)
13182 && (REG_P (x)
13183 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13184 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13185 && TARGET_FMOVD))))
13186 || (REGCLASS_HAS_GENERAL_REG (rclass)
13187 && REG_P (x)
13188 && FP_REGISTER_P (REGNO (x))))
13189 && ! TARGET_SHMEDIA
13190 && (mode == SFmode || mode == SImode))
13191 return FPUL_REGS;
13192 if ((rclass == FPUL_REGS
13193 || (REGCLASS_HAS_FP_REG (rclass)
13194 && ! TARGET_SHMEDIA && mode == SImode))
13195 && (MEM_P (x)
13196 || (REG_P (x)
13197 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13198 || REGNO (x) == T_REG
13199 || system_reg_operand (x, VOIDmode)))))
13201 if (rclass == FPUL_REGS)
13202 return GENERAL_REGS;
13203 return FPUL_REGS;
13205 if ((rclass == TARGET_REGS
13206 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13207 && !satisfies_constraint_Csy (x)
13208 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13209 return GENERAL_REGS;
13210 if ((rclass == MAC_REGS || rclass == PR_REGS)
13211 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13212 && rclass != REGNO_REG_CLASS (REGNO (x)))
13213 return GENERAL_REGS;
13214 if (rclass != GENERAL_REGS && REG_P (x)
13215 && TARGET_REGISTER_P (REGNO (x)))
13216 return GENERAL_REGS;
13218 /* If here fall back to loading FPUL register through general registers.
13219 This case can happen when movsi_ie insn is picked initially to
13220 load/store the FPUL register from/to another register, and then the
13221 other register is allocated on the stack. */
13222 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13223 return GENERAL_REGS;
13225 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13226 the other operand.
13227 On SH2A could also just leave it alone here, which would result in a
13228 4 byte move insn being generated instead. However, for this to work
13229 the insns must have the appropriate alternatives. */
13230 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13231 && satisfies_constraint_Sdd (x)
13232 && sh_disp_addr_displacement (x)
13233 <= sh_max_mov_insn_displacement (mode, false))
13234 return R0_REGS;
13236 /* When reload is trying to address a QImode or HImode subreg on the stack,
13237 force any subreg byte into R0_REGS, as this is going to become a
13238 displacement address.
13239 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13240 is on the stack, the memref to it might already require a displacement
13241 and that has to be added to the final address. At this point we don't
13242 know the cumulative displacement so we assume the worst case. */
13243 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13244 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13245 return R0_REGS;
13247 return NO_REGS;
13250 static void
13251 sh_conditional_register_usage (void)
13253 int regno;
13254 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13255 if (! VALID_REGISTER_P (regno))
13256 fixed_regs[regno] = call_used_regs[regno] = 1;
13257 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13258 if (TARGET_SH5)
13260 call_used_regs[FIRST_GENERAL_REG + 8]
13261 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13262 call_really_used_regs[FIRST_GENERAL_REG + 8]
13263 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13265 if (TARGET_SHMEDIA)
13267 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13268 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13269 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13271 if (flag_pic)
13273 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13274 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13276 /* Renesas saves and restores mac registers on call. */
13277 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13279 call_really_used_regs[MACH_REG] = 0;
13280 call_really_used_regs[MACL_REG] = 0;
13283 if (TARGET_SHMEDIA)
13285 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13286 if (! fixed_regs[regno] && call_really_used_regs[regno])
13287 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13289 else
13290 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13291 if (! fixed_regs[regno] && call_really_used_regs[regno])
13292 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13295 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13297 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13298 static bool
13299 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
13301 return (TARGET_SHMEDIA
13302 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13303 || x == CONST0_RTX (mode)
13304 || !TARGET_SHMEDIA_FPU
13305 || TARGET_SHMEDIA64)
13306 : (GET_CODE (x) != CONST_DOUBLE
13307 || mode == DFmode || mode == SFmode
13308 || mode == DImode || GET_MODE (x) == VOIDmode));
13311 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13313 static void
13314 sh_init_sync_libfuncs (void)
13316 init_sync_libfuncs (UNITS_PER_WORD);
13319 /* Return true if it is appropriate to emit `ret' instructions in the
13320 body of a function. */
13321 bool
13322 sh_can_use_simple_return_p (void)
13324 HARD_REG_SET live_regs_mask;
13325 int d;
13327 /* Some targets require special return insns. */
13328 if (TARGET_SHMEDIA
13329 || (TARGET_SHCOMPACT
13330 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13331 return false;
13333 if (! reload_completed || frame_pointer_needed)
13334 return false;
13336 /* Moving prologue around does't reduce the size. */
13337 if (optimize_function_for_size_p (cfun))
13338 return false;
13340 /* Finally, allow for pr save. */
13341 d = calc_live_regs (&live_regs_mask);
13343 if (rounded_frame_size (d) > 4)
13344 return false;
13346 return true;
13349 /*------------------------------------------------------------------------------
13350 Address mode optimization support code
13353 typedef HOST_WIDE_INT disp_t;
13354 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13355 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13356 static const disp_t INVALID_DISP = MAX_DISP;
13358 /* A memory reference which is described by a base register and a
13359 displacement. */
13360 class base_reg_disp
13362 public:
13363 base_reg_disp (rtx br, disp_t d);
13365 bool is_reg (void) const;
13366 bool is_disp (void) const;
13367 rtx reg (void) const;
13368 disp_t disp (void) const;
13370 private:
13371 rtx reg_;
13372 disp_t disp_;
13375 inline
13376 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13377 : reg_ (br), disp_ (d)
13381 inline bool
13382 base_reg_disp::is_reg (void) const
13384 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13387 inline bool
13388 base_reg_disp::is_disp (void) const
13390 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13393 inline rtx
13394 base_reg_disp::reg (void) const
13396 return reg_;
13399 inline disp_t
13400 base_reg_disp::disp (void) const
13402 return disp_;
13405 /* Find the base register and calculate the displacement for a given
13406 address rtx 'x'.
13407 This is done by walking the insn list backwards and following SET insns
13408 that set the value of the specified reg 'x'. */
13409 static base_reg_disp
13410 sh_find_base_reg_disp (rtx insn, rtx x, disp_t disp = 0, rtx base_reg = NULL)
13412 if (REG_P (x))
13414 if (REGNO (x) == GBR_REG)
13415 return base_reg_disp (x, disp);
13417 /* We've reached a hard-reg. This is probably the point where
13418 function args are copied to pseudos. Do not go any further and
13419 stick to the pseudo. If the original mem addr was in a hard reg
13420 from the beginning, it will become the base reg. */
13421 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13422 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13424 /* Try to find the previous insn that sets the reg. */
13425 for (rtx i = prev_nonnote_insn (insn); i != NULL;
13426 i = prev_nonnote_insn (i))
13428 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)
13429 && CALL_P (i))
13430 break;
13432 if (!NONJUMP_INSN_P (i))
13433 continue;
13435 rtx p = PATTERN (i);
13436 if (p != NULL && GET_CODE (p) == SET && REG_P (XEXP (p, 0))
13437 && REGNO (XEXP (p, 0)) == REGNO (x))
13439 /* If the recursion can't find out any more details about the
13440 source of the set, then this reg becomes our new base reg. */
13441 return sh_find_base_reg_disp (i, XEXP (p, 1), disp, XEXP (p, 0));
13445 /* When here, no previous insn was found that sets the reg.
13446 The input reg is already the base reg. */
13447 return base_reg_disp (x, disp);
13450 else if (GET_CODE (x) == PLUS)
13452 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13453 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13455 /* Either left or right val must be a reg.
13456 We don't handle the case of 'reg + reg' here. */
13457 if (left_val.is_reg () && right_val.is_disp ())
13458 return base_reg_disp (left_val.reg (), left_val.disp ()
13459 + right_val.disp () + disp);
13460 else if (right_val.is_reg () && left_val.is_disp ())
13461 return base_reg_disp (right_val.reg (), right_val.disp ()
13462 + left_val.disp () + disp);
13463 else
13464 return base_reg_disp (base_reg, disp);
13467 else if (CONST_INT_P (x))
13468 return base_reg_disp (NULL, disp + INTVAL (x));
13470 /* Didn't find anything useful. */
13471 return base_reg_disp (base_reg, disp);
13474 /* Given an insn and a memory operand, try to find an equivalent GBR
13475 based memory address and return the corresponding new memory address.
13476 Return NULL_RTX if not found. */
13478 sh_find_equiv_gbr_addr (rtx insn, rtx mem)
13480 if (!MEM_P (mem))
13481 return NULL_RTX;
13483 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13484 if (side_effects_p (XEXP (mem, 0)))
13485 return NULL_RTX;
13487 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13489 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13491 rtx disp = GEN_INT (gbr_disp.disp ());
13492 if (gbr_displacement (disp, GET_MODE (mem)))
13493 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13496 return NULL_RTX;
13499 /*------------------------------------------------------------------------------
13500 Manual insn combine support code.
13503 /* Given a reg rtx and a start insn, try to find the insn that sets the
13504 specified reg by using the specified insn stepping function, such as
13505 'prev_nonnote_insn_bb'. When the insn is found, try to extract the rtx
13506 of the reg set. */
13507 set_of_reg
13508 sh_find_set_of_reg (rtx reg, rtx insn, rtx_insn *(*stepfunc)(rtx))
13510 set_of_reg result;
13511 result.insn = insn;
13512 result.set_rtx = NULL_RTX;
13513 result.set_src = NULL_RTX;
13515 if (!REG_P (reg) || insn == NULL_RTX)
13516 return result;
13518 for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
13519 result.insn = stepfunc (result.insn))
13521 if (BARRIER_P (result.insn))
13522 return result;
13523 if (!NONJUMP_INSN_P (result.insn))
13524 continue;
13525 if (reg_set_p (reg, result.insn))
13527 result.set_rtx = set_of (reg, result.insn);
13529 if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
13530 return result;
13532 result.set_src = XEXP (result.set_rtx, 1);
13533 return result;
13537 return result;
13540 /* Given an op rtx and an insn, try to find out whether the result of the
13541 specified op consists only of logical operations on T bit stores. */
13542 bool
13543 sh_is_logical_t_store_expr (rtx op, rtx insn)
13545 if (!logical_operator (op, SImode))
13546 return false;
13548 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13549 int op_is_t_count = 0;
13551 for (int i = 0; i < 2; ++i)
13553 if (t_reg_operand (ops[i], VOIDmode)
13554 || negt_reg_operand (ops[i], VOIDmode))
13555 op_is_t_count++;
13557 else
13559 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13560 prev_nonnote_insn_bb);
13561 if (op_set.set_src == NULL_RTX)
13562 continue;
13564 if (t_reg_operand (op_set.set_src, VOIDmode)
13565 || negt_reg_operand (op_set.set_src, VOIDmode)
13566 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13567 op_is_t_count++;
13571 return op_is_t_count == 2;
13574 /* Given the operand that is extended in a sign/zero extend insn, and the
13575 insn, try to figure out whether the sign/zero extension can be replaced
13576 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13577 NULL_RTX otherwise. */
13579 sh_try_omit_signzero_extend (rtx extended_op, rtx insn)
13581 if (REG_P (extended_op))
13582 extended_op = extended_op;
13583 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13584 extended_op = SUBREG_REG (extended_op);
13585 else
13586 return NULL_RTX;
13588 /* Reg moves must be of the same mode. */
13589 if (GET_MODE (extended_op) != SImode)
13590 return NULL_RTX;
13592 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13593 if (s.set_src == NULL_RTX)
13594 return NULL_RTX;
13596 if (t_reg_operand (s.set_src, VOIDmode)
13597 || negt_reg_operand (s.set_src, VOIDmode))
13598 return extended_op;
13600 /* If the zero extended reg was formed by a logical operation, check the
13601 operands of the logical operation. If both originated from T bit
13602 stores the zero extension can be eliminated. */
13603 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13604 return extended_op;
13606 return NULL_RTX;
13609 static void
13610 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
13611 int prev_mode, HARD_REG_SET regs_live)
13613 if ((TARGET_SH4A_FP || TARGET_SH4_300)
13614 && prev_mode != FP_MODE_NONE && prev_mode != mode)
13616 emit_insn (gen_toggle_pr ());
13617 if (TARGET_FMOVD)
13618 emit_insn (gen_toggle_sz ());
13620 else
13621 fpscr_set_from_mem (mode, regs_live);
13624 static int
13625 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx insn)
13627 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
13630 static int
13631 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx insn)
13633 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
13634 get_attr_fp_set (insn) != FP_SET_NONE)
13635 return (int) get_attr_fp_set (insn);
13636 else
13637 return mode;
13640 static int
13641 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
13643 return NORMAL_MODE (entity);
13646 static int
13647 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
13649 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
13652 static int
13653 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
13655 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
13658 #include "gt-sh.h"