PR target/56858
[official-gcc.git] / gcc / config / sh / sh.c
blob02468dadcddbaa6e7d705ba74c1a972864235912
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2014 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
23 #include <vector>
24 #include <algorithm>
26 #include "config.h"
27 #include "system.h"
28 #include "coretypes.h"
29 #include "tm.h"
30 #include "insn-config.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "stringpool.h"
34 #include "stor-layout.h"
35 #include "calls.h"
36 #include "varasm.h"
37 #include "flags.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "output.h"
45 #include "insn-attr.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "dwarf2.h"
49 #include "tm_p.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "langhooks.h"
53 #include "basic-block.h"
54 #include "df.h"
55 #include "intl.h"
56 #include "sched-int.h"
57 #include "params.h"
58 #include "ggc.h"
59 #include "pointer-set.h"
60 #include "hash-table.h"
61 #include "tree-ssa-alias.h"
62 #include "internal-fn.h"
63 #include "gimple-fold.h"
64 #include "tree-eh.h"
65 #include "gimple-expr.h"
66 #include "is-a.h"
67 #include "gimple.h"
68 #include "gimplify.h"
69 #include "cfgloop.h"
70 #include "alloc-pool.h"
71 #include "tm-constrs.h"
72 #include "opts.h"
73 #include "tree-pass.h"
74 #include "pass_manager.h"
75 #include "context.h"
76 #include "builtins.h"
78 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
80 /* These are some macros to abstract register modes. */
81 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
82 && ((HOST_WIDE_INT)(VALUE)) <= 511)
84 #define CONST_OK_FOR_ADD(size) \
85 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
86 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
87 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
88 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
90 /* Used to simplify the logic below. Find the attributes wherever
91 they may be. */
92 #define SH_ATTRIBUTES(decl) \
93 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
94 : DECL_ATTRIBUTES (decl) \
95 ? (DECL_ATTRIBUTES (decl)) \
96 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
98 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
99 int current_function_interrupt;
101 tree sh_deferred_function_attributes;
102 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
104 /* Global variables for machine-dependent things. */
106 /* Which cpu are we scheduling for. */
107 enum processor_type sh_cpu;
109 /* Definitions used in ready queue reordering for first scheduling pass. */
111 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
112 static short *regmode_weight[2];
114 /* Total SFmode and SImode weights of scheduled insns. */
115 static int curr_regmode_pressure[2];
117 /* Number of r0 life regions. */
118 static int r0_life_regions;
120 /* If true, skip cycles for Q -> R movement. */
121 static int skip_cycles = 0;
123 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
124 and returned from sh_reorder2. */
125 static short cached_can_issue_more;
127 /* Unique number for UNSPEC_BBR pattern. */
128 static unsigned int unspec_bbr_uid = 1;
130 /* Provides the class number of the smallest class containing
131 reg number. */
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
172 GENERAL_REGS, GENERAL_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 int assembler_dialect;
184 static bool shmedia_space_reserved_for_target_registers;
186 static void split_branches (rtx);
187 static int branch_dest (rtx);
188 static void print_slot (rtx);
189 static rtx add_constant (rtx, enum machine_mode, rtx);
190 static void dump_table (rtx, rtx);
191 static bool broken_move (rtx);
192 static bool mova_p (rtx);
193 static rtx find_barrier (int, rtx, rtx);
194 static bool noncall_uses_reg (rtx, rtx, rtx *);
195 static rtx gen_block_redirect (rtx, int, int);
196 static void sh_reorg (void);
197 static void sh_option_override (void);
198 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
199 static rtx frame_insn (rtx);
200 static rtx push (int);
201 static void pop (int);
202 static void push_regs (HARD_REG_SET *, int);
203 static int calc_live_regs (HARD_REG_SET *);
204 static HOST_WIDE_INT rounded_frame_size (int);
205 static bool sh_frame_pointer_required (void);
206 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
207 static int sh_mode_needed (int, rtx);
208 static int sh_mode_after (int, int, rtx);
209 static int sh_mode_entry (int);
210 static int sh_mode_exit (int);
211 static int sh_mode_priority (int entity, int n);
213 static rtx mark_constant_pool_use (rtx);
214 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
215 int, bool *);
216 static tree sh_handle_resbank_handler_attribute (tree *, tree,
217 tree, int, bool *);
218 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
219 tree, int, bool *);
220 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
221 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
222 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
223 static void sh_print_operand (FILE *, rtx, int);
224 static void sh_print_operand_address (FILE *, rtx);
225 static bool sh_print_operand_punct_valid_p (unsigned char code);
226 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
227 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
228 static void sh_insert_attributes (tree, tree *);
229 static const char *sh_check_pch_target_flags (int);
230 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
231 static int sh_adjust_cost (rtx, rtx, rtx, int);
232 static int sh_issue_rate (void);
233 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
234 static short find_set_regmode_weight (rtx, enum machine_mode);
235 static short find_insn_regmode_weight (rtx, enum machine_mode);
236 static void find_regmode_weight (basic_block, enum machine_mode);
237 static int find_r0_life_regions (basic_block);
238 static void sh_md_init_global (FILE *, int, int);
239 static void sh_md_finish_global (FILE *, int);
240 static int rank_for_reorder (const void *, const void *);
241 static void swap_reorder (rtx *, int);
242 static void ready_reorder (rtx *, int);
243 static bool high_pressure (enum machine_mode);
244 static int sh_reorder (FILE *, int, rtx *, int *, int);
245 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
246 static void sh_md_init (FILE *, int, int);
247 static int sh_variable_issue (FILE *, int, rtx, int);
249 static bool sh_function_ok_for_sibcall (tree, tree);
251 static bool sh_cannot_modify_jumps_p (void);
252 static reg_class_t sh_target_reg_class (void);
253 static bool sh_optimize_target_register_callee_saved (bool);
254 static bool sh_ms_bitfield_layout_p (const_tree);
256 static void sh_init_builtins (void);
257 static tree sh_builtin_decl (unsigned, bool);
258 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
259 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
260 HOST_WIDE_INT, tree);
261 static void sh_file_start (void);
262 static bool flow_dependent_p (rtx, rtx);
263 static void flow_dependent_p_1 (rtx, const_rtx, void *);
264 static int shiftcosts (rtx);
265 static int and_xor_ior_costs (rtx, int);
266 static int addsubcosts (rtx);
267 static int multcosts (rtx);
268 static bool unspec_caller_rtx_p (rtx);
269 static bool sh_cannot_copy_insn_p (rtx);
270 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
271 static int sh_address_cost (rtx, enum machine_mode, addr_space_t, bool);
272 static int sh_pr_n_sets (void);
273 static rtx sh_allocate_initial_value (rtx);
274 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
275 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
276 enum machine_mode,
277 struct secondary_reload_info *);
278 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
279 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
280 static rtx sh_delegitimize_address (rtx);
281 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
282 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
283 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
284 static int scavenge_reg (HARD_REG_SET *s);
285 struct save_schedule_s;
286 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
287 struct save_schedule_s *, int);
289 static rtx sh_struct_value_rtx (tree, int);
290 static rtx sh_function_value (const_tree, const_tree, bool);
291 static bool sh_function_value_regno_p (const unsigned int);
292 static rtx sh_libcall_value (enum machine_mode, const_rtx);
293 static bool sh_return_in_memory (const_tree, const_tree);
294 static rtx sh_builtin_saveregs (void);
295 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
296 tree, int *, int);
297 static bool sh_strict_argument_naming (cumulative_args_t);
298 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
299 static tree sh_build_builtin_va_list (void);
300 static void sh_va_start (tree, rtx);
301 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
302 static bool sh_promote_prototypes (const_tree);
303 static enum machine_mode sh_promote_function_mode (const_tree type,
304 enum machine_mode,
305 int *punsignedp,
306 const_tree funtype,
307 int for_return);
308 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
309 const_tree, bool);
310 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
311 const_tree, bool);
312 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
313 tree, bool);
314 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
315 const_tree, bool);
316 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
317 const_tree, bool);
318 static bool sh_scalar_mode_supported_p (enum machine_mode);
319 static int sh_dwarf_calling_convention (const_tree);
320 static void sh_encode_section_info (tree, rtx, int);
321 static bool sh2a_function_vector_p (tree);
322 static void sh_trampoline_init (rtx, tree, rtx);
323 static rtx sh_trampoline_adjust_address (rtx);
324 static void sh_conditional_register_usage (void);
325 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
326 static int mov_insn_size (enum machine_mode, bool);
327 static int mov_insn_alignment_mask (enum machine_mode, bool);
328 static bool sequence_insn_p (rtx);
329 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
330 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
331 enum machine_mode, bool);
332 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
334 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
336 static const struct attribute_spec sh_attribute_table[] =
338 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
339 affects_type_identity } */
340 { "interrupt_handler", 0, 0, true, false, false,
341 sh_handle_interrupt_handler_attribute, false },
342 { "sp_switch", 1, 1, true, false, false,
343 sh_handle_sp_switch_attribute, false },
344 { "trap_exit", 1, 1, true, false, false,
345 sh_handle_trap_exit_attribute, false },
346 { "renesas", 0, 0, false, true, false,
347 sh_handle_renesas_attribute, false },
348 { "trapa_handler", 0, 0, true, false, false,
349 sh_handle_interrupt_handler_attribute, false },
350 { "nosave_low_regs", 0, 0, true, false, false,
351 sh_handle_interrupt_handler_attribute, false },
352 { "resbank", 0, 0, true, false, false,
353 sh_handle_resbank_handler_attribute, false },
354 { "function_vector", 1, 1, true, false, false,
355 sh2a_handle_function_vector_handler_attribute, false },
356 { NULL, 0, 0, false, false, false, NULL, false }
359 /* Initialize the GCC target structure. */
360 #undef TARGET_ATTRIBUTE_TABLE
361 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
363 /* The next two are used for debug info when compiling with -gdwarf. */
364 #undef TARGET_ASM_UNALIGNED_HI_OP
365 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
366 #undef TARGET_ASM_UNALIGNED_SI_OP
367 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
369 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
370 #undef TARGET_ASM_UNALIGNED_DI_OP
371 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
372 #undef TARGET_ASM_ALIGNED_DI_OP
373 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
375 #undef TARGET_OPTION_OVERRIDE
376 #define TARGET_OPTION_OVERRIDE sh_option_override
378 #undef TARGET_PRINT_OPERAND
379 #define TARGET_PRINT_OPERAND sh_print_operand
380 #undef TARGET_PRINT_OPERAND_ADDRESS
381 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
382 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
383 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
384 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
385 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
387 #undef TARGET_ASM_FUNCTION_EPILOGUE
388 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
390 #undef TARGET_ASM_OUTPUT_MI_THUNK
391 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
393 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
394 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
395 hook_bool_const_tree_hwi_hwi_const_tree_true
397 #undef TARGET_ASM_FILE_START
398 #define TARGET_ASM_FILE_START sh_file_start
399 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
400 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
402 #undef TARGET_REGISTER_MOVE_COST
403 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
405 #undef TARGET_INSERT_ATTRIBUTES
406 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
408 #undef TARGET_SCHED_ADJUST_COST
409 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
411 #undef TARGET_SCHED_ISSUE_RATE
412 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
414 /* The next 5 hooks have been implemented for reenabling sched1. With the
415 help of these macros we are limiting the movement of insns in sched1 to
416 reduce the register pressure. The overall idea is to keep count of SImode
417 and SFmode regs required by already scheduled insns. When these counts
418 cross some threshold values; give priority to insns that free registers.
419 The insn that frees registers is most likely to be the insn with lowest
420 LUID (original insn order); but such an insn might be there in the stalled
421 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
422 up to a max of 8 cycles so that such insns may move from Q -> R.
424 The description of the hooks are as below:
426 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
427 scheduler; it is called inside the sched_init function just after
428 find_insn_reg_weights function call. It is used to calculate the SImode
429 and SFmode weights of insns of basic blocks; much similar to what
430 find_insn_reg_weights does.
431 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
433 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
434 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
435 (Q)->(R).
437 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
438 high; reorder the ready queue so that the insn with lowest LUID will be
439 issued next.
441 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
442 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
444 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
445 can be returned from TARGET_SCHED_REORDER2.
447 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
449 #undef TARGET_SCHED_DFA_NEW_CYCLE
450 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
452 #undef TARGET_SCHED_INIT_GLOBAL
453 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
455 #undef TARGET_SCHED_FINISH_GLOBAL
456 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
458 #undef TARGET_SCHED_VARIABLE_ISSUE
459 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
461 #undef TARGET_SCHED_REORDER
462 #define TARGET_SCHED_REORDER sh_reorder
464 #undef TARGET_SCHED_REORDER2
465 #define TARGET_SCHED_REORDER2 sh_reorder2
467 #undef TARGET_SCHED_INIT
468 #define TARGET_SCHED_INIT sh_md_init
470 #undef TARGET_DELEGITIMIZE_ADDRESS
471 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
473 #undef TARGET_LEGITIMIZE_ADDRESS
474 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
476 #undef TARGET_CANNOT_MODIFY_JUMPS_P
477 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
478 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
479 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
480 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
481 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
482 sh_optimize_target_register_callee_saved
484 #undef TARGET_MS_BITFIELD_LAYOUT_P
485 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
487 #undef TARGET_INIT_BUILTINS
488 #define TARGET_INIT_BUILTINS sh_init_builtins
489 #undef TARGET_BUILTIN_DECL
490 #define TARGET_BUILTIN_DECL sh_builtin_decl
491 #undef TARGET_EXPAND_BUILTIN
492 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
494 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
495 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
497 #undef TARGET_CANNOT_COPY_INSN_P
498 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
499 #undef TARGET_RTX_COSTS
500 #define TARGET_RTX_COSTS sh_rtx_costs
501 #undef TARGET_ADDRESS_COST
502 #define TARGET_ADDRESS_COST sh_address_cost
503 #undef TARGET_ALLOCATE_INITIAL_VALUE
504 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
506 #undef TARGET_MACHINE_DEPENDENT_REORG
507 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
509 #undef TARGET_DWARF_REGISTER_SPAN
510 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
512 #ifdef HAVE_AS_TLS
513 #undef TARGET_HAVE_TLS
514 #define TARGET_HAVE_TLS true
515 #endif
517 #undef TARGET_PROMOTE_PROTOTYPES
518 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
519 #undef TARGET_PROMOTE_FUNCTION_MODE
520 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
522 #undef TARGET_FUNCTION_VALUE
523 #define TARGET_FUNCTION_VALUE sh_function_value
524 #undef TARGET_FUNCTION_VALUE_REGNO_P
525 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
526 #undef TARGET_LIBCALL_VALUE
527 #define TARGET_LIBCALL_VALUE sh_libcall_value
528 #undef TARGET_STRUCT_VALUE_RTX
529 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
530 #undef TARGET_RETURN_IN_MEMORY
531 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
533 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
534 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
535 #undef TARGET_SETUP_INCOMING_VARARGS
536 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
537 #undef TARGET_STRICT_ARGUMENT_NAMING
538 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
539 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
540 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
541 #undef TARGET_MUST_PASS_IN_STACK
542 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
543 #undef TARGET_PASS_BY_REFERENCE
544 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
545 #undef TARGET_CALLEE_COPIES
546 #define TARGET_CALLEE_COPIES sh_callee_copies
547 #undef TARGET_ARG_PARTIAL_BYTES
548 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
549 #undef TARGET_FUNCTION_ARG
550 #define TARGET_FUNCTION_ARG sh_function_arg
551 #undef TARGET_FUNCTION_ARG_ADVANCE
552 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
554 #undef TARGET_BUILD_BUILTIN_VA_LIST
555 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
556 #undef TARGET_EXPAND_BUILTIN_VA_START
557 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
558 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
559 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
561 #undef TARGET_SCALAR_MODE_SUPPORTED_P
562 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
563 #undef TARGET_VECTOR_MODE_SUPPORTED_P
564 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
566 #undef TARGET_CHECK_PCH_TARGET_FLAGS
567 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
569 #undef TARGET_DWARF_CALLING_CONVENTION
570 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
572 #undef TARGET_FRAME_POINTER_REQUIRED
573 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
575 #undef TARGET_MODE_EMIT
576 #define TARGET_MODE_EMIT sh_emit_mode_set
578 #undef TARGET_MODE_NEEDED
579 #define TARGET_MODE_NEEDED sh_mode_needed
581 #undef TARGET_MODE_AFTER
582 #define TARGET_MODE_AFTER sh_mode_after
584 #undef TARGET_MODE_ENTRY
585 #define TARGET_MODE_ENTRY sh_mode_entry
587 #undef TARGET_MODE_EXIT
588 #define TARGET_MODE_EXIT sh_mode_exit
590 #undef TARGET_MODE_PRIORITY
591 #define TARGET_MODE_PRIORITY sh_mode_priority
593 /* Return regmode weight for insn. */
594 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
595 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
597 /* Return current register pressure for regmode. */
598 #define CURR_REGMODE_PRESSURE(MODE)\
599 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
601 #undef TARGET_ENCODE_SECTION_INFO
602 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
604 #undef TARGET_SECONDARY_RELOAD
605 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
607 #undef TARGET_PREFERRED_RELOAD_CLASS
608 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
610 #undef TARGET_CONDITIONAL_REGISTER_USAGE
611 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
613 #undef TARGET_LEGITIMATE_ADDRESS_P
614 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
616 #undef TARGET_TRAMPOLINE_INIT
617 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
618 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
619 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
621 #undef TARGET_LEGITIMATE_CONSTANT_P
622 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
624 #undef TARGET_CANONICALIZE_COMPARISON
625 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
627 #undef TARGET_FIXED_CONDITION_CODE_REGS
628 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
630 /* Machine-specific symbol_ref flags. */
631 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
633 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
634 is used by optabs.c atomic op expansion code as well as in sync.md. */
635 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
636 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
638 struct gcc_target targetm = TARGET_INITIALIZER;
641 /* Information on the currently selected atomic model.
642 This is initialized in sh_option_override. */
643 static sh_atomic_model selected_atomic_model_;
645 const sh_atomic_model&
646 selected_atomic_model (void)
648 return selected_atomic_model_;
651 static sh_atomic_model
652 parse_validate_atomic_model_option (const char* str)
654 const char* model_names[sh_atomic_model::num_models];
655 model_names[sh_atomic_model::none] = "none";
656 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
657 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
658 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
659 model_names[sh_atomic_model::soft_imask] = "soft-imask";
661 const char* model_cdef_names[sh_atomic_model::num_models];
662 model_cdef_names[sh_atomic_model::none] = "NONE";
663 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
664 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
665 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
666 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
668 sh_atomic_model ret;
669 ret.type = sh_atomic_model::none;
670 ret.name = model_names[sh_atomic_model::none];
671 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
672 ret.strict = false;
673 ret.tcb_gbr_offset = -1;
675 /* Handle empty string as 'none'. */
676 if (str == NULL || *str == '\0')
677 return ret;
679 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
681 std::vector<std::string> tokens;
682 for (std::stringstream ss (str); ss.good (); )
684 tokens.push_back (std::string ());
685 std::getline (ss, tokens.back (), ',');
688 if (tokens.empty ())
689 err_ret ("invalid atomic model option");
691 /* The first token must be the atomic model name. */
693 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
694 if (tokens.front () == model_names[i])
696 ret.type = (sh_atomic_model::enum_type)i;
697 ret.name = model_names[i];
698 ret.cdef_name = model_cdef_names[i];
699 goto got_mode_name;
702 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
703 got_mode_name:;
706 /* Go through the remaining tokens. */
707 for (size_t i = 1; i < tokens.size (); ++i)
709 if (tokens[i] == "strict")
710 ret.strict = true;
711 else if (tokens[i].find ("gbr-offset=") == 0)
713 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
714 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
715 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
716 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
717 "option", offset_str.c_str ());
719 else
720 err_ret ("unknown parameter \"%s\" in atomic model option",
721 tokens[i].c_str ());
724 /* Check that the selection makes sense. */
725 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
726 err_ret ("atomic operations are not supported on SHmedia");
728 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
729 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
730 ret.name);
732 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
733 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
735 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
736 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
738 if (ret.type == sh_atomic_model::soft_tcb
739 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
740 || (ret.tcb_gbr_offset & 3) != 0))
741 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
742 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
743 ret.name);
745 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
746 err_ret ("cannot use atomic model %s in user mode", ret.name);
748 return ret;
750 #undef err_ret
753 /* Register SH specific RTL passes. */
754 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
755 const char* name);
756 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
757 const char* name);
758 static void
759 register_sh_passes (void)
761 if (!TARGET_SH1)
762 return;
764 /* Running the sh_treg_combine pass after ce1 generates better code when
765 comparisons are combined and reg-reg moves are introduced, because
766 reg-reg moves will be eliminated afterwards. However, there are quite
767 some cases where combine will be unable to fold comparison related insns,
768 thus for now don't do it.
769 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
770 PASS_POS_INSERT_AFTER, "ce1", 1);
773 /* Run sh_treg_combine pass after combine but before register allocation. */
774 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
775 PASS_POS_INSERT_AFTER, "split1", 1);
777 /* Run sh_treg_combine pass after register allocation and basic block
778 reordering as this sometimes creates new opportunities. */
779 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
780 PASS_POS_INSERT_AFTER, "split4", 1);
782 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
783 is known after a conditional branch.
784 This must be done after basic blocks and branch conditions have
785 stabilized and won't be changed by further passes. */
786 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
787 PASS_POS_INSERT_BEFORE, "sched2", 1);
790 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
791 various options, and do some machine dependent initialization. */
792 static void
793 sh_option_override (void)
795 int regno;
797 SUBTARGET_OVERRIDE_OPTIONS;
798 if (optimize > 1 && !optimize_size)
799 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
801 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
802 TARGET_CBRANCHDI4 = 1;
803 TARGET_CMPEQDI_T = 0;
805 sh_cpu = PROCESSOR_SH1;
806 assembler_dialect = 0;
807 if (TARGET_SH2)
808 sh_cpu = PROCESSOR_SH2;
809 if (TARGET_SH2E)
810 sh_cpu = PROCESSOR_SH2E;
811 if (TARGET_SH2A)
812 sh_cpu = PROCESSOR_SH2A;
813 if (TARGET_SH3)
814 sh_cpu = PROCESSOR_SH3;
815 if (TARGET_SH3E)
816 sh_cpu = PROCESSOR_SH3E;
817 if (TARGET_SH4)
819 assembler_dialect = 1;
820 sh_cpu = PROCESSOR_SH4;
822 if (TARGET_SH4A_ARCH)
824 assembler_dialect = 1;
825 sh_cpu = PROCESSOR_SH4A;
827 if (TARGET_SH5)
829 sh_cpu = PROCESSOR_SH5;
830 target_flags |= MASK_ALIGN_DOUBLE;
831 if (TARGET_SHMEDIA_FPU)
832 target_flags |= MASK_FMOVD;
833 if (TARGET_SHMEDIA)
835 /* There are no delay slots on SHmedia. */
836 flag_delayed_branch = 0;
837 /* Relaxation isn't yet supported for SHmedia */
838 target_flags &= ~MASK_RELAX;
839 /* After reload, if conversion does little good but can cause
840 ICEs:
841 - find_if_block doesn't do anything for SH because we don't
842 have conditional execution patterns. (We use conditional
843 move patterns, which are handled differently, and only
844 before reload).
845 - find_cond_trap doesn't do anything for the SH because we
846 don't have conditional traps.
847 - find_if_case_1 uses redirect_edge_and_branch_force in
848 the only path that does an optimization, and this causes
849 an ICE when branch targets are in registers.
850 - find_if_case_2 doesn't do anything for the SHmedia after
851 reload except when it can redirect a tablejump - and
852 that's rather rare. */
853 flag_if_conversion2 = 0;
854 if (! strcmp (sh_div_str, "call"))
855 sh_div_strategy = SH_DIV_CALL;
856 else if (! strcmp (sh_div_str, "call2"))
857 sh_div_strategy = SH_DIV_CALL2;
858 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
859 sh_div_strategy = SH_DIV_FP;
860 else if (! strcmp (sh_div_str, "inv"))
861 sh_div_strategy = SH_DIV_INV;
862 else if (! strcmp (sh_div_str, "inv:minlat"))
863 sh_div_strategy = SH_DIV_INV_MINLAT;
864 else if (! strcmp (sh_div_str, "inv20u"))
865 sh_div_strategy = SH_DIV_INV20U;
866 else if (! strcmp (sh_div_str, "inv20l"))
867 sh_div_strategy = SH_DIV_INV20L;
868 else if (! strcmp (sh_div_str, "inv:call2"))
869 sh_div_strategy = SH_DIV_INV_CALL2;
870 else if (! strcmp (sh_div_str, "inv:call"))
871 sh_div_strategy = SH_DIV_INV_CALL;
872 else if (! strcmp (sh_div_str, "inv:fp"))
874 if (TARGET_FPU_ANY)
875 sh_div_strategy = SH_DIV_INV_FP;
876 else
877 sh_div_strategy = SH_DIV_INV;
879 TARGET_CBRANCHDI4 = 0;
880 /* Assembler CFI isn't yet fully supported for SHmedia. */
881 flag_dwarf2_cfi_asm = 0;
884 else
886 /* Only the sh64-elf assembler fully supports .quad properly. */
887 targetm.asm_out.aligned_op.di = NULL;
888 targetm.asm_out.unaligned_op.di = NULL;
890 if (TARGET_SH1)
892 if (! strcmp (sh_div_str, "call-div1"))
893 sh_div_strategy = SH_DIV_CALL_DIV1;
894 else if (! strcmp (sh_div_str, "call-fp")
895 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
896 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
897 sh_div_strategy = SH_DIV_CALL_FP;
898 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
899 sh_div_strategy = SH_DIV_CALL_TABLE;
900 else
901 /* Pick one that makes most sense for the target in general.
902 It is not much good to use different functions depending
903 on -Os, since then we'll end up with two different functions
904 when some of the code is compiled for size, and some for
905 speed. */
907 /* SH4 tends to emphasize speed. */
908 if (TARGET_HARD_SH4)
909 sh_div_strategy = SH_DIV_CALL_TABLE;
910 /* These have their own way of doing things. */
911 else if (TARGET_SH2A)
912 sh_div_strategy = SH_DIV_INTRINSIC;
913 /* ??? Should we use the integer SHmedia function instead? */
914 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
915 sh_div_strategy = SH_DIV_CALL_FP;
916 /* SH1 .. SH3 cores often go into small-footprint systems, so
917 default to the smallest implementation available. */
918 else
919 sh_div_strategy = SH_DIV_CALL_DIV1;
921 if (!TARGET_SH1)
922 TARGET_PRETEND_CMOVE = 0;
923 if (sh_divsi3_libfunc[0])
924 ; /* User supplied - leave it alone. */
925 else if (TARGET_DIVIDE_CALL_FP)
926 sh_divsi3_libfunc = "__sdivsi3_i4";
927 else if (TARGET_DIVIDE_CALL_TABLE)
928 sh_divsi3_libfunc = "__sdivsi3_i4i";
929 else if (TARGET_SH5)
930 sh_divsi3_libfunc = "__sdivsi3_1";
931 else
932 sh_divsi3_libfunc = "__sdivsi3";
934 if (sh_branch_cost == -1)
936 /* The SH1 does not have delay slots, hence we get a pipeline stall
937 at every branch. The SH4 is superscalar, so the single delay slot
938 is not sufficient to keep both pipelines filled.
939 In any case, set the default branch cost to '2', as it results in
940 slightly overall smaller code and also enables some if conversions
941 that are required for matching special T bit related insns. */
942 sh_branch_cost = 2;
945 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
946 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
947 TARGET_ZDCBRANCH = 1;
949 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
950 if (! VALID_REGISTER_P (regno))
951 sh_register_names[regno][0] = '\0';
953 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
954 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
955 sh_additional_register_names[regno][0] = '\0';
957 if ((flag_pic && ! TARGET_PREFERGOT)
958 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
959 flag_no_function_cse = 1;
961 if (targetm.small_register_classes_for_mode_p (VOIDmode))
963 /* Never run scheduling before reload, since that can
964 break global alloc, and generates slower code anyway due
965 to the pressure on R0. */
966 /* Enable sched1 for SH4 if the user explicitly requests.
967 When sched1 is enabled, the ready queue will be reordered by
968 the target hooks if pressure is high. We can not do this for
969 PIC, SH3 and lower as they give spill failures for R0. */
970 if (!TARGET_HARD_SH4 || flag_pic)
971 flag_schedule_insns = 0;
972 /* ??? Current exception handling places basic block boundaries
973 after call_insns. It causes the high pressure on R0 and gives
974 spill failures for R0 in reload. See PR 22553 and the thread
975 on gcc-patches
976 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
977 else if (flag_exceptions)
979 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
980 warning (0, "ignoring -fschedule-insns because of exception "
981 "handling bug");
982 flag_schedule_insns = 0;
984 else if (flag_schedule_insns
985 && !global_options_set.x_flag_schedule_insns)
986 flag_schedule_insns = 0;
989 /* Unwind info is not correct around the CFG unless either a frame
990 pointer is present or M_A_O_A is set. Fixing this requires rewriting
991 unwind info generation to be aware of the CFG and propagating states
992 around edges. */
993 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
994 || flag_exceptions || flag_non_call_exceptions)
995 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
997 warning (0, "unwind tables currently require either a frame pointer "
998 "or -maccumulate-outgoing-args for correctness");
999 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
1002 /* Unwinding with -freorder-blocks-and-partition does not work on this
1003 architecture, because it requires far jumps to label crossing between
1004 hot/cold sections which are rejected on this architecture. */
1005 if (flag_reorder_blocks_and_partition)
1007 if (flag_exceptions)
1009 inform (input_location,
1010 "-freorder-blocks-and-partition does not work with "
1011 "exceptions on this architecture");
1012 flag_reorder_blocks_and_partition = 0;
1013 flag_reorder_blocks = 1;
1015 else if (flag_unwind_tables)
1017 inform (input_location,
1018 "-freorder-blocks-and-partition does not support unwind "
1019 "info on this architecture");
1020 flag_reorder_blocks_and_partition = 0;
1021 flag_reorder_blocks = 1;
1025 /* Adjust loop, jump and function alignment values (in bytes), if those
1026 were not specified by the user using -falign-loops, -falign-jumps
1027 and -falign-functions options.
1028 32 bit alignment is better for speed, because instructions can be
1029 fetched as a pair from a longword boundary. For size use 16 bit
1030 alignment to get more compact code.
1031 Aligning all jumps increases the code size, even if it might
1032 result in slightly faster code. Thus, it is set to the smallest
1033 alignment possible if not specified by the user. */
1034 if (align_loops == 0)
1036 if (TARGET_SH5)
1037 align_loops = 8;
1038 else
1039 align_loops = optimize_size ? 2 : 4;
1042 if (align_jumps == 0)
1044 if (TARGET_SHMEDIA)
1045 align_jumps = 1 << CACHE_LOG;
1046 else
1047 align_jumps = 2;
1049 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1050 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1052 if (align_functions == 0)
1054 if (TARGET_SHMEDIA)
1055 align_functions = optimize_size
1056 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1057 else
1058 align_functions = optimize_size ? 2 : 4;
1061 /* The linker relaxation code breaks when a function contains
1062 alignments that are larger than that at the start of a
1063 compilation unit. */
1064 if (TARGET_RELAX)
1066 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1068 /* Also take possible .long constants / mova tables into account. */
1069 if (min_align < 4)
1070 min_align = 4;
1071 if (align_functions < min_align)
1072 align_functions = min_align;
1075 if (flag_unsafe_math_optimizations)
1077 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1078 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1079 TARGET_FSCA = 1;
1081 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1082 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1083 TARGET_FSRRA = 1;
1086 /* Allow fsrra insn only if -funsafe-math-optimizations and
1087 -ffinite-math-only is enabled. */
1088 TARGET_FSRRA = TARGET_FSRRA
1089 && flag_unsafe_math_optimizations
1090 && flag_finite_math_only;
1092 /* If the -mieee option was not explicitly set by the user, turn it on
1093 unless -ffinite-math-only was specified. See also PR 33135. */
1094 if (! global_options_set.x_TARGET_IEEE)
1095 TARGET_IEEE = ! flag_finite_math_only;
1097 if (sh_fixed_range_str)
1098 sh_fix_range (sh_fixed_range_str);
1100 /* This target defaults to strict volatile bitfields. */
1101 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1102 flag_strict_volatile_bitfields = 1;
1104 /* Parse atomic model option and make sure it is valid for the current
1105 target CPU. */
1106 selected_atomic_model_
1107 = parse_validate_atomic_model_option (sh_atomic_model_str);
1109 register_sh_passes ();
1112 /* Print the operand address in x to the stream. */
1113 static void
1114 sh_print_operand_address (FILE *stream, rtx x)
1116 switch (GET_CODE (x))
1118 case REG:
1119 case SUBREG:
1120 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1121 break;
1123 case PLUS:
1125 rtx base = XEXP (x, 0);
1126 rtx index = XEXP (x, 1);
1128 switch (GET_CODE (index))
1130 case CONST_INT:
1131 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1132 reg_names[true_regnum (base)]);
1133 break;
1135 case REG:
1136 case SUBREG:
1138 int base_num = true_regnum (base);
1139 int index_num = true_regnum (index);
1141 fprintf (stream, "@(r0,%s)",
1142 reg_names[MAX (base_num, index_num)]);
1143 break;
1146 default:
1147 gcc_unreachable ();
1150 break;
1152 case PRE_DEC:
1153 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1154 break;
1156 case POST_INC:
1157 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1158 break;
1160 default:
1161 x = mark_constant_pool_use (x);
1162 output_addr_const (stream, x);
1163 break;
1167 /* Print operand x (an rtx) in assembler syntax to file stream
1168 according to modifier code.
1170 '.' print a .s if insn needs delay slot
1171 ',' print LOCAL_LABEL_PREFIX
1172 '@' print trap, rte or rts depending upon pragma interruptness
1173 '#' output a nop if there is nothing to put in the delay slot
1174 ''' print likelihood suffix (/u for unlikely).
1175 '>' print branch target if -fverbose-asm
1176 'O' print a constant without the #
1177 'R' print the LSW of a dp value - changes if in little endian
1178 'S' print the MSW of a dp value - changes if in little endian
1179 'T' print the next word of a dp value - same as 'R' in big endian mode.
1180 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1181 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1182 'N' print 'r63' if the operand is (const_int 0).
1183 'd' print a V2SF reg as dN instead of fpN.
1184 'm' print a pair `base,offset' or `base,index', for LD and ST.
1185 'U' Likewise for {LD,ST}{HI,LO}.
1186 'V' print the position of a single bit set.
1187 'W' print the position of a single bit cleared.
1188 't' print a memory address which is a register.
1189 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1190 'o' output an operator. */
1191 static void
1192 sh_print_operand (FILE *stream, rtx x, int code)
1194 int regno;
1195 enum machine_mode mode;
1197 switch (code)
1199 tree trapa_attr;
1201 case '.':
1202 if (final_sequence
1203 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1204 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1205 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1206 break;
1207 case ',':
1208 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1209 break;
1210 case '@':
1211 trapa_attr = lookup_attribute ("trap_exit",
1212 DECL_ATTRIBUTES (current_function_decl));
1213 if (trapa_attr)
1214 fprintf (stream, "trapa #%ld",
1215 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1216 else if (sh_cfun_interrupt_handler_p ())
1218 if (sh_cfun_resbank_handler_p ())
1219 fprintf (stream, "resbank\n");
1220 fprintf (stream, "rte");
1222 else
1223 fprintf (stream, "rts");
1224 break;
1225 case '#':
1226 /* Output a nop if there's nothing in the delay slot. */
1227 if (dbr_sequence_length () == 0)
1228 fprintf (stream, "\n\tnop");
1229 break;
1230 case '\'':
1232 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1234 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1235 fputs ("/u", stream);
1236 break;
1238 case '>':
1239 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1241 fputs ("\t! target: ", stream);
1242 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1244 break;
1245 case 'O':
1246 x = mark_constant_pool_use (x);
1247 output_addr_const (stream, x);
1248 break;
1249 /* N.B.: %R / %S / %T adjust memory addresses by four.
1250 For SHMEDIA, that means they can be used to access the first and
1251 second 32 bit part of a 64 bit (or larger) value that
1252 might be held in floating point registers or memory.
1253 While they can be used to access 64 bit parts of a larger value
1254 held in general purpose registers, that won't work with memory -
1255 neither for fp registers, since the frxx names are used. */
1256 case 'R':
1257 if (REG_P (x) || GET_CODE (x) == SUBREG)
1259 regno = true_regnum (x);
1260 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1261 fputs (reg_names[regno], (stream));
1263 else if (MEM_P (x))
1265 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1266 sh_print_operand_address (stream, XEXP (x, 0));
1268 else
1270 rtx sub = NULL_RTX;
1272 mode = GET_MODE (x);
1273 if (mode == VOIDmode)
1274 mode = DImode;
1275 if (GET_MODE_SIZE (mode) >= 8)
1276 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1277 if (sub)
1278 sh_print_operand (stream, sub, 0);
1279 else
1280 output_operand_lossage ("invalid operand to %%R");
1282 break;
1283 case 'S':
1284 if (REG_P (x) || GET_CODE (x) == SUBREG)
1286 regno = true_regnum (x);
1287 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1288 fputs (reg_names[regno], (stream));
1290 else if (MEM_P (x))
1292 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1293 sh_print_operand_address (stream, XEXP (x, 0));
1295 else
1297 rtx sub = NULL_RTX;
1299 mode = GET_MODE (x);
1300 if (mode == VOIDmode)
1301 mode = DImode;
1302 if (GET_MODE_SIZE (mode) >= 8)
1303 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1304 if (sub)
1305 sh_print_operand (stream, sub, 0);
1306 else
1307 output_operand_lossage ("invalid operand to %%S");
1309 break;
1310 case 'T':
1311 /* Next word of a double. */
1312 switch (GET_CODE (x))
1314 case REG:
1315 fputs (reg_names[REGNO (x) + 1], (stream));
1316 break;
1317 case MEM:
1318 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1319 && GET_CODE (XEXP (x, 0)) != POST_INC)
1320 x = adjust_address (x, SImode, 4);
1321 sh_print_operand_address (stream, XEXP (x, 0));
1322 break;
1323 default:
1324 break;
1326 break;
1328 case 't':
1329 gcc_assert (MEM_P (x));
1330 x = XEXP (x, 0);
1331 switch (GET_CODE (x))
1333 case REG:
1334 case SUBREG:
1335 sh_print_operand (stream, x, 0);
1336 break;
1337 default:
1338 break;
1340 break;
1342 case 'o':
1343 switch (GET_CODE (x))
1345 case PLUS: fputs ("add", stream); break;
1346 case MINUS: fputs ("sub", stream); break;
1347 case MULT: fputs ("mul", stream); break;
1348 case DIV: fputs ("div", stream); break;
1349 case EQ: fputs ("eq", stream); break;
1350 case NE: fputs ("ne", stream); break;
1351 case GT: case LT: fputs ("gt", stream); break;
1352 case GE: case LE: fputs ("ge", stream); break;
1353 case GTU: case LTU: fputs ("gtu", stream); break;
1354 case GEU: case LEU: fputs ("geu", stream); break;
1355 default:
1356 break;
1358 break;
1359 case 'M':
1360 if (TARGET_SHMEDIA)
1362 if (MEM_P (x)
1363 && GET_CODE (XEXP (x, 0)) == PLUS
1364 && (REG_P (XEXP (XEXP (x, 0), 1))
1365 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1366 fputc ('x', stream);
1368 else
1370 if (MEM_P (x))
1372 switch (GET_MODE (x))
1374 case QImode: fputs (".b", stream); break;
1375 case HImode: fputs (".w", stream); break;
1376 case SImode: fputs (".l", stream); break;
1377 case SFmode: fputs (".s", stream); break;
1378 case DFmode: fputs (".d", stream); break;
1379 default: gcc_unreachable ();
1383 break;
1385 case 'm':
1386 gcc_assert (MEM_P (x));
1387 x = XEXP (x, 0);
1388 /* Fall through. */
1389 case 'U':
1390 switch (GET_CODE (x))
1392 case REG:
1393 case SUBREG:
1394 sh_print_operand (stream, x, 0);
1395 fputs (", 0", stream);
1396 break;
1398 case PLUS:
1399 sh_print_operand (stream, XEXP (x, 0), 0);
1400 fputs (", ", stream);
1401 sh_print_operand (stream, XEXP (x, 1), 0);
1402 break;
1404 default:
1405 gcc_unreachable ();
1407 break;
1409 case 'V':
1411 int num = exact_log2 (INTVAL (x));
1412 gcc_assert (num >= 0);
1413 fprintf (stream, "#%d", num);
1415 break;
1417 case 'W':
1419 int num = exact_log2 (~INTVAL (x));
1420 gcc_assert (num >= 0);
1421 fprintf (stream, "#%d", num);
1423 break;
1425 case 'd':
1426 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1428 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1429 break;
1431 case 'N':
1432 if (x == CONST0_RTX (GET_MODE (x)))
1434 fprintf ((stream), "r63");
1435 break;
1437 goto default_output;
1438 case 'u':
1439 if (CONST_INT_P (x))
1441 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1442 break;
1444 /* Fall through. */
1446 default_output:
1447 default:
1448 regno = 0;
1449 mode = GET_MODE (x);
1451 switch (GET_CODE (x))
1453 case TRUNCATE:
1455 rtx inner = XEXP (x, 0);
1456 int offset = 0;
1457 enum machine_mode inner_mode;
1459 /* We might see SUBREGs with vector mode registers inside. */
1460 if (GET_CODE (inner) == SUBREG
1461 && (GET_MODE_SIZE (GET_MODE (inner))
1462 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1463 && subreg_lowpart_p (inner))
1464 inner = SUBREG_REG (inner);
1465 if (CONST_INT_P (inner))
1467 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1468 goto default_output;
1470 inner_mode = GET_MODE (inner);
1471 if (GET_CODE (inner) == SUBREG
1472 && (GET_MODE_SIZE (GET_MODE (inner))
1473 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1474 && REG_P (SUBREG_REG (inner)))
1476 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1477 GET_MODE (SUBREG_REG (inner)),
1478 SUBREG_BYTE (inner),
1479 GET_MODE (inner));
1480 inner = SUBREG_REG (inner);
1482 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1483 abort ();
1484 /* Floating point register pairs are always big endian;
1485 general purpose registers are 64 bit wide. */
1486 regno = REGNO (inner);
1487 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1488 - HARD_REGNO_NREGS (regno, mode))
1489 + offset;
1490 x = inner;
1491 goto reg;
1493 case SIGN_EXTEND:
1494 x = XEXP (x, 0);
1495 goto reg;
1496 /* FIXME: We need this on SHmedia32 because reload generates
1497 some sign-extended HI or QI loads into DImode registers
1498 but, because Pmode is SImode, the address ends up with a
1499 subreg:SI of the DImode register. Maybe reload should be
1500 fixed so as to apply alter_subreg to such loads? */
1501 case IF_THEN_ELSE:
1502 gcc_assert (trapping_target_operand (x, VOIDmode));
1503 x = XEXP (XEXP (x, 2), 0);
1504 goto default_output;
1505 case SUBREG:
1506 gcc_assert (SUBREG_BYTE (x) == 0
1507 && REG_P (SUBREG_REG (x)));
1509 x = SUBREG_REG (x);
1510 /* Fall through. */
1512 reg:
1513 case REG:
1514 regno += REGNO (x);
1515 if (FP_REGISTER_P (regno)
1516 && mode == V16SFmode)
1517 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1518 else if (FP_REGISTER_P (REGNO (x))
1519 && mode == V4SFmode)
1520 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1521 else if (REG_P (x)
1522 && mode == V2SFmode)
1523 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1524 else if (FP_REGISTER_P (REGNO (x))
1525 && GET_MODE_SIZE (mode) > 4)
1526 fprintf ((stream), "d%s", reg_names[regno] + 1);
1527 else
1528 fputs (reg_names[regno], (stream));
1529 break;
1531 case MEM:
1532 output_address (XEXP (x, 0));
1533 break;
1535 default:
1536 if (TARGET_SH1)
1537 fputc ('#', stream);
1538 output_addr_const (stream, x);
1539 break;
1541 break;
1545 static bool
1546 sh_print_operand_punct_valid_p (unsigned char code)
1548 return (code == '.' || code == '#' || code == '@' || code == ','
1549 || code == '$' || code == '\'' || code == '>');
1552 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1553 static bool
1554 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1556 if (GET_CODE (x) == UNSPEC)
1558 switch (XINT (x, 1))
1560 case UNSPEC_DATALABEL:
1561 fputs ("datalabel ", file);
1562 output_addr_const (file, XVECEXP (x, 0, 0));
1563 break;
1564 case UNSPEC_PIC:
1565 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1566 output_addr_const (file, XVECEXP (x, 0, 0));
1567 break;
1568 case UNSPEC_GOT:
1569 output_addr_const (file, XVECEXP (x, 0, 0));
1570 fputs ("@GOT", file);
1571 break;
1572 case UNSPEC_GOTOFF:
1573 output_addr_const (file, XVECEXP (x, 0, 0));
1574 fputs ("@GOTOFF", file);
1575 break;
1576 case UNSPEC_PLT:
1577 output_addr_const (file, XVECEXP (x, 0, 0));
1578 fputs ("@PLT", file);
1579 break;
1580 case UNSPEC_GOTPLT:
1581 output_addr_const (file, XVECEXP (x, 0, 0));
1582 fputs ("@GOTPLT", file);
1583 break;
1584 case UNSPEC_DTPOFF:
1585 output_addr_const (file, XVECEXP (x, 0, 0));
1586 fputs ("@DTPOFF", file);
1587 break;
1588 case UNSPEC_GOTTPOFF:
1589 output_addr_const (file, XVECEXP (x, 0, 0));
1590 fputs ("@GOTTPOFF", file);
1591 break;
1592 case UNSPEC_TPOFF:
1593 output_addr_const (file, XVECEXP (x, 0, 0));
1594 fputs ("@TPOFF", file);
1595 break;
1596 case UNSPEC_CALLER:
1598 char name[32];
1599 /* LPCS stands for Label for PIC Call Site. */
1600 targetm.asm_out.generate_internal_label (name, "LPCS",
1601 INTVAL (XVECEXP (x, 0, 0)));
1602 assemble_name (file, name);
1604 break;
1605 case UNSPEC_EXTRACT_S16:
1606 case UNSPEC_EXTRACT_U16:
1608 rtx val, shift;
1610 val = XVECEXP (x, 0, 0);
1611 shift = XVECEXP (x, 0, 1);
1612 fputc ('(', file);
1613 if (shift != const0_rtx)
1614 fputc ('(', file);
1615 if (GET_CODE (val) == CONST
1616 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1618 fputc ('(', file);
1619 output_addr_const (file, val);
1620 fputc (')', file);
1622 else
1623 output_addr_const (file, val);
1624 if (shift != const0_rtx)
1626 fputs (" >> ", file);
1627 output_addr_const (file, shift);
1628 fputc (')', file);
1630 fputs (" & 65535)", file);
1632 break;
1633 case UNSPEC_SYMOFF:
1634 output_addr_const (file, XVECEXP (x, 0, 0));
1635 fputc ('-', file);
1636 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1638 fputc ('(', file);
1639 output_addr_const (file, XVECEXP (x, 0, 1));
1640 fputc (')', file);
1642 else
1643 output_addr_const (file, XVECEXP (x, 0, 1));
1644 break;
1645 case UNSPEC_PCREL_SYMOFF:
1646 output_addr_const (file, XVECEXP (x, 0, 0));
1647 fputs ("-(", file);
1648 output_addr_const (file, XVECEXP (x, 0, 1));
1649 fputs ("-.)", file);
1650 break;
1651 default:
1652 return false;
1654 return true;
1656 else
1657 return false;
1660 /* Encode symbol attributes of a SYMBOL_REF into its
1661 SYMBOL_REF_FLAGS. */
1662 static void
1663 sh_encode_section_info (tree decl, rtx rtl, int first)
1665 default_encode_section_info (decl, rtl, first);
1667 if (TREE_CODE (decl) == FUNCTION_DECL
1668 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1669 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1672 /* Prepare operands for a move define_expand; specifically, one of the
1673 operands must be in a register. */
1674 void
1675 prepare_move_operands (rtx operands[], enum machine_mode mode)
1677 if ((mode == SImode || mode == DImode)
1678 && flag_pic
1679 && ! ((mode == Pmode || mode == ptr_mode)
1680 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1682 rtx temp;
1683 if (SYMBOLIC_CONST_P (operands[1]))
1685 if (MEM_P (operands[0]))
1686 operands[1] = force_reg (Pmode, operands[1]);
1687 else if (TARGET_SHMEDIA
1688 && GET_CODE (operands[1]) == LABEL_REF
1689 && target_reg_operand (operands[0], mode))
1690 /* It's ok. */;
1691 else
1693 temp = (!can_create_pseudo_p ()
1694 ? operands[0]
1695 : gen_reg_rtx (Pmode));
1696 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1699 else if (GET_CODE (operands[1]) == CONST
1700 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1701 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1703 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1704 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1705 mode, temp);
1706 operands[1] = expand_binop (mode, add_optab, temp,
1707 XEXP (XEXP (operands[1], 0), 1),
1708 (!can_create_pseudo_p ()
1709 ? temp
1710 : gen_reg_rtx (Pmode)),
1711 0, OPTAB_LIB_WIDEN);
1715 if (! reload_in_progress && ! reload_completed)
1717 /* Copy the source to a register if both operands aren't registers. */
1718 if (! register_operand (operands[0], mode)
1719 && ! sh_register_operand (operands[1], mode))
1720 operands[1] = copy_to_mode_reg (mode, operands[1]);
1722 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1724 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1725 except that we can't use that function because it is static. */
1726 rtx new_rtx = change_address (operands[0], mode, 0);
1727 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1728 operands[0] = new_rtx;
1731 /* This case can happen while generating code to move the result
1732 of a library call to the target. Reject `st r0,@(rX,rY)' because
1733 reload will fail to find a spill register for rX, since r0 is already
1734 being used for the source. */
1735 else if (TARGET_SH1
1736 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1737 && MEM_P (operands[0])
1738 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1739 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1740 operands[1] = copy_to_mode_reg (mode, operands[1]);
1743 if (mode == Pmode || mode == ptr_mode)
1745 rtx op0, op1, opc;
1746 enum tls_model tls_kind;
1748 op0 = operands[0];
1749 op1 = operands[1];
1750 if (GET_CODE (op1) == CONST
1751 && GET_CODE (XEXP (op1, 0)) == PLUS
1752 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1753 != TLS_MODEL_NONE))
1755 opc = XEXP (XEXP (op1, 0), 1);
1756 op1 = XEXP (XEXP (op1, 0), 0);
1758 else
1759 opc = NULL_RTX;
1761 if (! reload_in_progress && ! reload_completed
1762 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1764 rtx tga_op1, tga_ret, tmp, tmp2;
1766 if (! flag_pic
1767 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1768 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1769 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1771 /* Don't schedule insns for getting GOT address when
1772 the first scheduling is enabled, to avoid spill
1773 failures for R0. */
1774 if (flag_schedule_insns)
1775 emit_insn (gen_blockage ());
1776 emit_insn (gen_GOTaddr2picreg ());
1777 emit_use (gen_rtx_REG (SImode, PIC_REG));
1778 if (flag_schedule_insns)
1779 emit_insn (gen_blockage ());
1782 switch (tls_kind)
1784 case TLS_MODEL_GLOBAL_DYNAMIC:
1785 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1786 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1787 tmp = gen_reg_rtx (Pmode);
1788 emit_move_insn (tmp, tga_ret);
1789 op1 = tmp;
1790 break;
1792 case TLS_MODEL_LOCAL_DYNAMIC:
1793 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1794 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1796 tmp = gen_reg_rtx (Pmode);
1797 emit_move_insn (tmp, tga_ret);
1799 if (register_operand (op0, Pmode))
1800 tmp2 = op0;
1801 else
1802 tmp2 = gen_reg_rtx (Pmode);
1804 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1805 op1 = tmp2;
1806 break;
1808 case TLS_MODEL_INITIAL_EXEC:
1809 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1810 tmp = gen_sym2GOTTPOFF (op1);
1811 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1812 op1 = tga_op1;
1813 break;
1815 case TLS_MODEL_LOCAL_EXEC:
1816 tmp2 = gen_reg_rtx (Pmode);
1817 emit_insn (gen_store_gbr (tmp2));
1818 tmp = gen_reg_rtx (Pmode);
1819 emit_insn (gen_symTPOFF2reg (tmp, op1));
1821 if (register_operand (op0, Pmode))
1822 op1 = op0;
1823 else
1824 op1 = gen_reg_rtx (Pmode);
1826 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1827 break;
1829 default:
1830 gcc_unreachable ();
1832 if (opc)
1833 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1834 operands[1] = op1;
1839 /* Implement the canonicalize_comparison target hook for the combine
1840 pass. For the target hook this function is invoked via
1841 sh_canonicalize_comparison. This function is also re-used to
1842 canonicalize comparisons in cbranch pattern expanders. */
1843 static void
1844 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1845 enum machine_mode mode,
1846 bool op0_preserve_value)
1848 /* When invoked from within the combine pass the mode is not specified,
1849 so try to get it from one of the operands. */
1850 if (mode == VOIDmode)
1851 mode = GET_MODE (op0);
1852 if (mode == VOIDmode)
1853 mode = GET_MODE (op1);
1855 // We need to have a mode to do something useful here.
1856 if (mode == VOIDmode)
1857 return;
1859 // Currently, we don't deal with floats here.
1860 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1861 return;
1863 // Make sure that the constant operand is the second operand.
1864 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1866 if (op0_preserve_value)
1867 return;
1869 std::swap (op0, op1);
1870 cmp = swap_condition (cmp);
1873 if (CONST_INT_P (op1))
1875 /* Try to adjust the constant operand in such a way that available
1876 comparison insns can be utilized better and the constant can be
1877 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1878 constant pool. */
1879 const HOST_WIDE_INT val = INTVAL (op1);
1881 /* x > -1 --> x >= 0
1882 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1883 x <= -1 --> x < 0
1884 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1885 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1887 cmp = cmp == GT ? GE : LT;
1888 op1 = gen_int_mode (val + 1, mode);
1891 /* x >= 1 --> x > 0
1892 x >= 0x80 --> x > 0x7F
1893 x < 1 --> x <= 0
1894 x < 0x80 --> x <= 0x7F */
1895 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1897 cmp = cmp == GE ? GT : LE;
1898 op1 = gen_int_mode (val - 1, mode);
1901 /* unsigned x >= 1 --> x != 0
1902 unsigned x < 1 --> x == 0 */
1903 else if (val == 1 && (cmp == GEU || cmp == LTU))
1905 cmp = cmp == GEU ? NE : EQ;
1906 op1 = CONST0_RTX (mode);
1909 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1910 unsigned x < 0x80 --> unsigned x < 0x7F */
1911 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1913 cmp = cmp == GEU ? GTU : LEU;
1914 op1 = gen_int_mode (val - 1, mode);
1917 /* unsigned x > 0 --> x != 0
1918 unsigned x <= 0 --> x == 0 */
1919 else if (val == 0 && (cmp == GTU || cmp == LEU))
1920 cmp = cmp == GTU ? NE : EQ;
1922 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1923 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1924 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1925 && val == 0x7FFFFFFF)
1927 cmp = cmp == GTU ? LT : GE;
1928 op1 = const0_rtx;
1931 /* unsigned x >= 0x80000000 --> signed x < 0
1932 unsigned x < 0x80000000 --> signed x >= 0 */
1933 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1934 && (unsigned HOST_WIDE_INT)val
1935 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1937 cmp = cmp == GEU ? LT : GE;
1938 op1 = const0_rtx;
1943 /* This function implements the canonicalize_comparison target hook.
1944 This wrapper around the internally used sh_canonicalize_comparison
1945 function is needed to do the enum rtx_code <-> int conversion.
1946 Target hooks cannot use enum rtx_code in its definition. */
1947 static void
1948 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1949 bool op0_preserve_value)
1951 enum rtx_code tmp_code = (enum rtx_code)*code;
1952 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1953 VOIDmode, op0_preserve_value);
1954 *code = (int)tmp_code;
1957 bool
1958 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1960 *p1 = T_REG;
1961 *p2 = INVALID_REGNUM;
1962 return true;
1965 enum rtx_code
1966 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1967 enum rtx_code comparison)
1969 /* The scratch reg is only available when this is invoked from within
1970 the cbranchdi4_i splitter, through expand_cbranchdi4. */
1971 rtx scratch = NULL_RTX;
1973 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1974 comparison = GET_CODE (operands[0]);
1975 else
1976 scratch = operands[4];
1978 sh_canonicalize_comparison (comparison, operands[1], operands[2],
1979 mode, false);
1981 /* Notice that this function is also invoked after reload by
1982 the cbranchdi4_i pattern, through expand_cbranchdi4. */
1983 rtx op1 = operands[1];
1985 if (can_create_pseudo_p ())
1986 operands[1] = force_reg (mode, op1);
1987 /* When we are handling DImode comparisons, we want to keep constants so
1988 that we can optimize the component comparisons; however, memory loads
1989 are better issued as a whole so that they can be scheduled well.
1990 SImode equality comparisons allow I08 constants, but only when they
1991 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1992 into a register, that register might as well be r0, and we allow the
1993 constant. If it is already in a register, this is likely to be
1994 allocated to a different hard register, thus we load the constant into
1995 a register unless it is zero. */
1996 if (!REG_P (operands[2])
1997 && (!CONST_INT_P (operands[2])
1998 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1999 && ((comparison != EQ && comparison != NE)
2000 || (REG_P (op1) && REGNO (op1) != R0_REG)
2001 || !satisfies_constraint_I08 (operands[2])))))
2003 if (scratch && GET_MODE (scratch) == mode)
2005 emit_move_insn (scratch, operands[2]);
2006 operands[2] = scratch;
2008 else if (can_create_pseudo_p ())
2009 operands[2] = force_reg (mode, operands[2]);
2011 return comparison;
2014 void
2015 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2017 rtx (*branch_expander) (rtx) = gen_branch_true;
2018 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2019 switch (comparison)
2021 case NE: case LT: case LE: case LTU: case LEU:
2022 comparison = reverse_condition (comparison);
2023 branch_expander = gen_branch_false;
2024 default: ;
2026 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
2027 gen_rtx_fmt_ee (comparison, SImode,
2028 operands[1], operands[2])));
2029 rtx jump = emit_jump_insn (branch_expander (operands[3]));
2030 if (probability >= 0)
2031 add_int_reg_note (jump, REG_BR_PROB, probability);
2034 /* ??? How should we distribute probabilities when more than one branch
2035 is generated. So far we only have some ad-hoc observations:
2036 - If the operands are random, they are likely to differ in both parts.
2037 - If comparing items in a hash chain, the operands are random or equal;
2038 operation should be EQ or NE.
2039 - If items are searched in an ordered tree from the root, we can expect
2040 the highpart to be unequal about half of the time; operation should be
2041 an inequality comparison, operands non-constant, and overall probability
2042 about 50%. Likewise for quicksort.
2043 - Range checks will be often made against constants. Even if we assume for
2044 simplicity an even distribution of the non-constant operand over a
2045 sub-range here, the same probability could be generated with differently
2046 wide sub-ranges - as long as the ratio of the part of the subrange that
2047 is before the threshold to the part that comes after the threshold stays
2048 the same. Thus, we can't really tell anything here;
2049 assuming random distribution is at least simple.
2051 bool
2052 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2054 enum rtx_code msw_taken, msw_skip, lsw_taken;
2055 rtx skip_label = NULL_RTX;
2056 rtx op1h, op1l, op2h, op2l;
2057 int num_branches;
2058 int prob, rev_prob;
2059 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2060 rtx scratch = operands[4];
2062 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2063 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2064 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2065 op1l = gen_lowpart (SImode, operands[1]);
2066 op2l = gen_lowpart (SImode, operands[2]);
2067 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2068 prob = split_branch_probability;
2069 rev_prob = REG_BR_PROB_BASE - prob;
2070 switch (comparison)
2072 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2073 That costs 1 cycle more when the first branch can be predicted taken,
2074 but saves us mispredicts because only one branch needs prediction.
2075 It also enables generating the cmpeqdi_t-1 pattern. */
2076 case EQ:
2077 if (TARGET_CMPEQDI_T)
2079 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2080 emit_jump_insn (gen_branch_true (operands[3]));
2081 return true;
2083 msw_skip = NE;
2084 lsw_taken = EQ;
2085 if (prob >= 0)
2087 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2088 msw_skip_prob = rev_prob;
2089 if (REG_BR_PROB_BASE <= 65535)
2090 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2091 else
2093 lsw_taken_prob
2094 = (prob
2095 ? (REG_BR_PROB_BASE
2096 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2097 / ((gcov_type) prob << 32)))
2098 : 0);
2101 break;
2102 case NE:
2103 if (TARGET_CMPEQDI_T)
2105 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2106 emit_jump_insn (gen_branch_false (operands[3]));
2107 return true;
2109 msw_taken = NE;
2110 msw_taken_prob = prob;
2111 lsw_taken = NE;
2112 lsw_taken_prob = 0;
2113 break;
2114 case GTU: case GT:
2115 msw_taken = comparison;
2116 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2117 break;
2118 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2119 msw_skip = swap_condition (msw_taken);
2120 lsw_taken = GTU;
2121 break;
2122 case GEU: case GE:
2123 if (op2l == CONST0_RTX (SImode))
2124 msw_taken = comparison;
2125 else
2127 msw_taken = comparison == GE ? GT : GTU;
2128 msw_skip = swap_condition (msw_taken);
2129 lsw_taken = GEU;
2131 break;
2132 case LTU: case LT:
2133 msw_taken = comparison;
2134 if (op2l == CONST0_RTX (SImode))
2135 break;
2136 msw_skip = swap_condition (msw_taken);
2137 lsw_taken = LTU;
2138 break;
2139 case LEU: case LE:
2140 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2141 msw_taken = comparison;
2142 else
2144 lsw_taken = LEU;
2145 if (comparison == LE)
2146 msw_taken = LT;
2147 else if (op2h != CONST0_RTX (SImode))
2148 msw_taken = LTU;
2149 else
2151 msw_skip = swap_condition (LTU);
2152 break;
2154 msw_skip = swap_condition (msw_taken);
2156 break;
2157 default: return false;
2159 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2160 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2161 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2162 if (comparison != EQ && comparison != NE && num_branches > 1)
2164 if (!CONSTANT_P (operands[2])
2165 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2166 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2168 msw_taken_prob = prob / 2U;
2169 msw_skip_prob
2170 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2171 lsw_taken_prob = prob;
2173 else
2175 msw_taken_prob = prob;
2176 msw_skip_prob = REG_BR_PROB_BASE;
2177 /* ??? If we have a constant op2h, should we use that when
2178 calculating lsw_taken_prob? */
2179 lsw_taken_prob = prob;
2182 operands[1] = op1h;
2183 operands[2] = op2h;
2184 operands[4] = NULL_RTX;
2185 if (reload_completed
2186 && ! arith_reg_or_0_operand (op2h, SImode)
2187 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2188 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2189 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2191 emit_move_insn (scratch, operands[2]);
2192 operands[2] = scratch;
2194 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2195 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2196 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2198 rtx taken_label = operands[3];
2200 /* Operands were possibly modified, but msw_skip doesn't expect this.
2201 Always use the original ones. */
2202 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2204 operands[1] = op1h;
2205 operands[2] = op2h;
2206 if (reload_completed
2207 && ! arith_reg_or_0_operand (op2h, SImode)
2208 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2210 emit_move_insn (scratch, operands[2]);
2211 operands[2] = scratch;
2215 operands[3] = skip_label = gen_label_rtx ();
2216 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2217 operands[3] = taken_label;
2219 operands[1] = op1l;
2220 operands[2] = op2l;
2221 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2223 if (reload_completed
2224 && ! arith_reg_or_0_operand (op2l, SImode)
2225 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2227 emit_move_insn (scratch, operands[2]);
2228 operands[2] = scratch;
2230 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2232 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2233 emit_label (skip_label);
2234 return true;
2237 /* Given an operand, return 1 if the evaluated operand plugged into an
2238 if_then_else will result in a branch_true, 0 if branch_false, or
2239 -1 if neither nor applies. The truth table goes like this:
2241 op | cmpval | code | result
2242 ---------+--------+---------+--------------------
2243 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2244 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2245 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2246 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2247 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2248 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2249 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2250 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2252 sh_eval_treg_value (rtx op)
2254 if (t_reg_operand (op, GET_MODE (op)))
2255 return 1;
2256 if (negt_reg_operand (op, GET_MODE (op)))
2257 return 0;
2259 rtx_code code = GET_CODE (op);
2260 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2261 return -1;
2263 int cmpop = code == EQ ? 1 : 0;
2264 int cmpval = INTVAL (XEXP (op, 1));
2265 if (cmpval != 0 && cmpval != 1)
2266 return -1;
2268 int t;
2269 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2270 t = 0;
2271 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2272 t = 1;
2273 else
2274 return -1;
2276 return t ^ (cmpval == cmpop);
2279 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2281 static void
2282 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2284 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2286 insn = gen_rtx_PARALLEL (VOIDmode,
2287 gen_rtvec (2, insn,
2288 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2289 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2291 else
2292 emit_insn (insn);
2295 /* Prepare the operands for an scc instruction; make sure that the
2296 compare has been done and the result is in T_REG. */
2297 void
2298 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2300 rtx t_reg = get_t_reg_rtx ();
2301 enum rtx_code oldcode = code;
2302 enum machine_mode mode;
2304 /* First need a compare insn. */
2305 switch (code)
2307 case NE:
2308 /* It isn't possible to handle this case. */
2309 gcc_unreachable ();
2310 case LT:
2311 code = GT;
2312 break;
2313 case LE:
2314 code = GE;
2315 break;
2316 case LTU:
2317 code = GTU;
2318 break;
2319 case LEU:
2320 code = GEU;
2321 break;
2322 default:
2323 break;
2325 if (code != oldcode)
2327 rtx tmp = op0;
2328 op0 = op1;
2329 op1 = tmp;
2332 mode = GET_MODE (op0);
2333 if (mode == VOIDmode)
2334 mode = GET_MODE (op1);
2336 op0 = force_reg (mode, op0);
2337 if ((code != EQ && code != NE
2338 && (op1 != const0_rtx
2339 || code == GTU || code == GEU || code == LTU || code == LEU))
2340 || (mode == DImode && op1 != const0_rtx)
2341 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2342 op1 = force_reg (mode, op1);
2344 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2345 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2346 mode);
2350 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2351 rtx op0, rtx op1)
2353 rtx target = gen_reg_rtx (SImode);
2354 rtx tmp;
2356 gcc_assert (TARGET_SHMEDIA);
2357 switch (code)
2359 case EQ:
2360 case GT:
2361 case LT:
2362 case UNORDERED:
2363 case GTU:
2364 case LTU:
2365 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2366 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2367 code = NE;
2368 break;
2370 case NE:
2371 case GE:
2372 case LE:
2373 case ORDERED:
2374 case GEU:
2375 case LEU:
2376 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2377 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2378 code = EQ;
2379 break;
2381 case UNEQ:
2382 case UNGE:
2383 case UNGT:
2384 case UNLE:
2385 case UNLT:
2386 case LTGT:
2387 return NULL_RTX;
2389 default:
2390 gcc_unreachable ();
2393 if (mode == DImode)
2395 rtx t2 = gen_reg_rtx (DImode);
2396 emit_insn (gen_extendsidi2 (t2, target));
2397 target = t2;
2400 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2403 /* Called from the md file, set up the operands of a compare instruction. */
2404 void
2405 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2407 enum rtx_code code = GET_CODE (operands[0]);
2408 enum rtx_code branch_code;
2409 rtx op0 = operands[1];
2410 rtx op1 = operands[2];
2411 rtx insn, tem;
2412 bool need_ccmpeq = false;
2414 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2416 op0 = force_reg (mode, op0);
2417 op1 = force_reg (mode, op1);
2419 else
2421 if (code != EQ || mode == DImode)
2423 /* Force args into regs, since we can't use constants here. */
2424 op0 = force_reg (mode, op0);
2425 if (op1 != const0_rtx || code == GTU || code == GEU)
2426 op1 = force_reg (mode, op1);
2430 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2432 if (code == LT
2433 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2434 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2436 tem = op0, op0 = op1, op1 = tem;
2437 code = swap_condition (code);
2440 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2441 if (code == GE)
2443 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2444 need_ccmpeq = true;
2445 code = GT;
2448 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2449 to EQ/GT respectively. */
2450 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2453 switch (code)
2455 case EQ:
2456 case GT:
2457 case GE:
2458 case GTU:
2459 case GEU:
2460 branch_code = code;
2461 break;
2462 case NE:
2463 case LT:
2464 case LE:
2465 case LTU:
2466 case LEU:
2467 branch_code = reverse_condition (code);
2468 break;
2469 default:
2470 gcc_unreachable ();
2473 insn = gen_rtx_SET (VOIDmode,
2474 get_t_reg_rtx (),
2475 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2477 sh_emit_set_t_insn (insn, mode);
2478 if (need_ccmpeq)
2479 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2481 if (branch_code == code)
2482 emit_jump_insn (gen_branch_true (operands[3]));
2483 else
2484 emit_jump_insn (gen_branch_false (operands[3]));
2487 void
2488 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2490 enum rtx_code code = GET_CODE (operands[1]);
2491 rtx op0 = operands[2];
2492 rtx op1 = operands[3];
2493 rtx lab = NULL_RTX;
2494 bool invert = false;
2495 rtx tem;
2497 op0 = force_reg (mode, op0);
2498 if ((code != EQ && code != NE
2499 && (op1 != const0_rtx
2500 || code == GTU || code == GEU || code == LTU || code == LEU))
2501 || (mode == DImode && op1 != const0_rtx)
2502 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2503 op1 = force_reg (mode, op1);
2505 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2507 if (code == LT || code == LE)
2509 code = swap_condition (code);
2510 tem = op0, op0 = op1, op1 = tem;
2512 if (code == GE)
2514 if (TARGET_IEEE)
2516 lab = gen_label_rtx ();
2517 sh_emit_scc_to_t (EQ, op0, op1);
2518 emit_jump_insn (gen_branch_true (lab));
2519 code = GT;
2521 else
2523 code = LT;
2524 invert = true;
2529 if (code == NE)
2531 code = EQ;
2532 invert = true;
2535 sh_emit_scc_to_t (code, op0, op1);
2536 if (lab)
2537 emit_label (lab);
2538 if (invert)
2539 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2540 else
2541 emit_move_insn (operands[0], get_t_reg_rtx ());
2544 /* Functions to output assembly code. */
2546 /* Return a sequence of instructions to perform DI or DF move.
2548 Since the SH cannot move a DI or DF in one instruction, we have
2549 to take care when we see overlapping source and dest registers. */
2550 const char *
2551 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2552 enum machine_mode mode)
2554 rtx dst = operands[0];
2555 rtx src = operands[1];
2557 if (MEM_P (dst)
2558 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2559 return "mov.l %T1,%0" "\n"
2560 " mov.l %1,%0";
2562 if (register_operand (dst, mode)
2563 && register_operand (src, mode))
2565 if (REGNO (src) == MACH_REG)
2566 return "sts mach,%S0" "\n"
2567 " sts macl,%R0";
2569 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2570 when mov.d r1,r0 do r1->r0 then r2->r1. */
2571 if (REGNO (src) + 1 == REGNO (dst))
2572 return "mov %T1,%T0" "\n"
2573 " mov %1,%0";
2574 else
2575 return "mov %1,%0" "\n"
2576 " mov %T1,%T0";
2578 else if (CONST_INT_P (src))
2580 if (INTVAL (src) < 0)
2581 output_asm_insn ("mov #-1,%S0", operands);
2582 else
2583 output_asm_insn ("mov #0,%S0", operands);
2585 return "mov %1,%R0";
2587 else if (MEM_P (src))
2589 int ptrreg = -1;
2590 int dreg = REGNO (dst);
2591 rtx inside = XEXP (src, 0);
2593 switch (GET_CODE (inside))
2595 case REG:
2596 ptrreg = REGNO (inside);
2597 break;
2599 case SUBREG:
2600 ptrreg = subreg_regno (inside);
2601 break;
2603 case PLUS:
2604 ptrreg = REGNO (XEXP (inside, 0));
2605 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2606 an offsettable address. Unfortunately, offsettable addresses use
2607 QImode to check the offset, and a QImode offsettable address
2608 requires r0 for the other operand, which is not currently
2609 supported, so we can't use the 'o' constraint.
2610 Thus we must check for and handle r0+REG addresses here.
2611 We punt for now, since this is likely very rare. */
2612 gcc_assert (!REG_P (XEXP (inside, 1)));
2613 break;
2615 case LABEL_REF:
2616 return "mov.l %1,%0" "\n"
2617 " mov.l %1+4,%T0";
2618 case POST_INC:
2619 return "mov.l %1,%0" "\n"
2620 " mov.l %1,%T0";
2621 default:
2622 gcc_unreachable ();
2625 /* Work out the safe way to copy. Copy into the second half first. */
2626 if (dreg == ptrreg)
2627 return "mov.l %T1,%T0" "\n"
2628 " mov.l %1,%0";
2631 return "mov.l %1,%0" "\n"
2632 " mov.l %T1,%T0";
2635 /* Print an instruction which would have gone into a delay slot after
2636 another instruction, but couldn't because the other instruction expanded
2637 into a sequence where putting the slot insn at the end wouldn't work. */
2638 static void
2639 print_slot (rtx insn)
2641 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2643 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2646 const char *
2647 output_far_jump (rtx insn, rtx op)
2649 struct { rtx lab, reg, op; } this_jmp;
2650 rtx braf_base_lab = NULL_RTX;
2651 const char *jump;
2652 int far;
2653 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2654 rtx prev;
2656 this_jmp.lab = gen_label_rtx ();
2658 if (TARGET_SH2
2659 && offset >= -32764
2660 && offset - get_attr_length (insn) <= 32766)
2662 far = 0;
2663 jump = "mov.w %O0,%1" "\n"
2664 " braf %1";
2666 else
2668 far = 1;
2669 if (flag_pic)
2671 if (TARGET_SH2)
2672 jump = "mov.l %O0,%1" "\n"
2673 " braf %1";
2674 else
2675 jump = "mov.l r0,@-r15" "\n"
2676 " mova %O0,r0" "\n"
2677 " mov.l @r0,%1" "\n"
2678 " add r0,%1" "\n"
2679 " mov.l @r15+,r0" "\n"
2680 " jmp @%1";
2682 else
2683 jump = "mov.l %O0,%1" "\n"
2684 " jmp @%1";
2686 /* If we have a scratch register available, use it. */
2687 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2688 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2690 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2691 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2692 jump = "mov.l r1,@-r15" "\n"
2693 " mova %O0,r0" "\n"
2694 " mov.l @r0,r1" "\n"
2695 " add r1,r0" "\n"
2696 " mov.l @r15+,r1" "\n"
2697 " jmp @%1";
2698 output_asm_insn (jump, &this_jmp.lab);
2699 if (dbr_sequence_length ())
2700 print_slot (final_sequence);
2701 else
2702 output_asm_insn ("nop", 0);
2704 else
2706 /* Output the delay slot insn first if any. */
2707 if (dbr_sequence_length ())
2708 print_slot (final_sequence);
2710 this_jmp.reg = gen_rtx_REG (SImode, 13);
2711 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2712 Fortunately, MACL is fixed and call-clobbered, and we never
2713 need its value across jumps, so save r13 in it instead of in
2714 the stack. */
2715 if (TARGET_SH5)
2716 output_asm_insn ("lds r13,macl", 0);
2717 else
2718 output_asm_insn ("mov.l r13,@-r15", 0);
2719 output_asm_insn (jump, &this_jmp.lab);
2720 if (TARGET_SH5)
2721 output_asm_insn ("sts macl,r13", 0);
2722 else
2723 output_asm_insn ("mov.l @r15+,r13", 0);
2725 if (far && flag_pic && TARGET_SH2)
2727 braf_base_lab = gen_label_rtx ();
2728 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2729 CODE_LABEL_NUMBER (braf_base_lab));
2731 if (far)
2732 output_asm_insn (".align 2", 0);
2733 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2734 this_jmp.op = op;
2735 if (far && flag_pic)
2737 if (TARGET_SH2)
2738 this_jmp.lab = braf_base_lab;
2739 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2741 else
2742 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2743 return "";
2746 /* Local label counter, used for constants in the pool and inside
2747 pattern branches. */
2748 static int lf = 100;
2750 /* Output code for ordinary branches. */
2751 const char *
2752 output_branch (int logic, rtx insn, rtx *operands)
2754 switch (get_attr_length (insn))
2756 case 6:
2757 /* This can happen if filling the delay slot has caused a forward
2758 branch to exceed its range (we could reverse it, but only
2759 when we know we won't overextend other branches; this should
2760 best be handled by relaxation).
2761 It can also happen when other condbranches hoist delay slot insn
2762 from their destination, thus leading to code size increase.
2763 But the branch will still be in the range -4092..+4098 bytes. */
2764 if (! TARGET_RELAX)
2766 int label = lf++;
2767 /* The call to print_slot will clobber the operands. */
2768 rtx op0 = operands[0];
2770 /* If the instruction in the delay slot is annulled (true), then
2771 there is no delay slot where we can put it now. The only safe
2772 place for it is after the label. final will do that by default. */
2774 if (final_sequence
2775 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2776 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2778 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2779 ASSEMBLER_DIALECT ? "/" : ".", label);
2780 print_slot (final_sequence);
2782 else
2783 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2785 output_asm_insn ("bra\t%l0", &op0);
2786 fprintf (asm_out_file, "\tnop\n");
2787 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2789 return "";
2791 /* When relaxing, handle this like a short branch. The linker
2792 will fix it up if it still doesn't fit after relaxation. */
2793 case 2:
2794 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2796 /* These are for SH2e, in which we have to account for the
2797 extra nop because of the hardware bug in annulled branches. */
2798 case 8:
2799 if (! TARGET_RELAX)
2801 int label = lf++;
2803 gcc_assert (!final_sequence
2804 || !(INSN_ANNULLED_BRANCH_P
2805 (XVECEXP (final_sequence, 0, 0))));
2806 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2807 logic ? "f" : "t",
2808 ASSEMBLER_DIALECT ? "/" : ".", label);
2809 fprintf (asm_out_file, "\tnop\n");
2810 output_asm_insn ("bra\t%l0", operands);
2811 fprintf (asm_out_file, "\tnop\n");
2812 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2814 return "";
2816 /* When relaxing, fall through. */
2817 case 4:
2819 char buffer[10];
2821 sprintf (buffer, "b%s%ss\t%%l0",
2822 logic ? "t" : "f",
2823 ASSEMBLER_DIALECT ? "/" : ".");
2824 output_asm_insn (buffer, &operands[0]);
2825 return "nop";
2828 default:
2829 /* There should be no longer branches now - that would
2830 indicate that something has destroyed the branches set
2831 up in machine_dependent_reorg. */
2832 gcc_unreachable ();
2836 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2837 fill in operands 9 as a label to the successor insn.
2838 We try to use jump threading where possible.
2839 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2840 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2841 follow jmp and bt, if the address is in range. */
2842 const char *
2843 output_branchy_insn (enum rtx_code code, const char *templ,
2844 rtx insn, rtx *operands)
2846 rtx next_insn = NEXT_INSN (insn);
2848 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2850 rtx src = SET_SRC (PATTERN (next_insn));
2851 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2853 /* Following branch not taken */
2854 operands[9] = gen_label_rtx ();
2855 emit_label_after (operands[9], next_insn);
2856 INSN_ADDRESSES_NEW (operands[9],
2857 INSN_ADDRESSES (INSN_UID (next_insn))
2858 + get_attr_length (next_insn));
2859 return templ;
2861 else
2863 int offset = (branch_dest (next_insn)
2864 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2865 if (offset >= -252 && offset <= 258)
2867 if (GET_CODE (src) == IF_THEN_ELSE)
2868 /* branch_true */
2869 src = XEXP (src, 1);
2870 operands[9] = src;
2871 return templ;
2875 operands[9] = gen_label_rtx ();
2876 emit_label_after (operands[9], insn);
2877 INSN_ADDRESSES_NEW (operands[9],
2878 INSN_ADDRESSES (INSN_UID (insn))
2879 + get_attr_length (insn));
2880 return templ;
2883 const char *
2884 output_ieee_ccmpeq (rtx insn, rtx *operands)
2886 return output_branchy_insn (NE, "bt %l9" "\n"
2887 " fcmp/eq %1,%0",
2888 insn, operands);
2891 /* Output the start of the assembler file. */
2892 static void
2893 sh_file_start (void)
2895 default_file_start ();
2897 if (TARGET_ELF)
2898 /* We need to show the text section with the proper
2899 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2900 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2901 will complain. We can teach GAS specifically about the
2902 default attributes for our choice of text section, but
2903 then we would have to change GAS again if/when we change
2904 the text section name. */
2905 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2906 else
2907 /* Switch to the data section so that the coffsem symbol
2908 isn't in the text section. */
2909 switch_to_section (data_section);
2911 if (TARGET_LITTLE_ENDIAN)
2912 fputs ("\t.little\n", asm_out_file);
2914 if (!TARGET_ELF)
2916 if (TARGET_SHCOMPACT)
2917 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2918 else if (TARGET_SHMEDIA)
2919 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2920 TARGET_SHMEDIA64 ? 64 : 32);
2924 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2925 static bool
2926 unspec_caller_rtx_p (rtx pat)
2928 rtx base, offset;
2929 int i;
2931 split_const (pat, &base, &offset);
2932 if (GET_CODE (base) == UNSPEC)
2934 if (XINT (base, 1) == UNSPEC_CALLER)
2935 return true;
2936 for (i = 0; i < XVECLEN (base, 0); i++)
2937 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2938 return true;
2940 return false;
2943 /* Indicate that INSN cannot be duplicated. This is true for insn
2944 that generates a unique label. */
2945 static bool
2946 sh_cannot_copy_insn_p (rtx insn)
2948 rtx pat;
2950 if (!reload_completed || !flag_pic)
2951 return false;
2953 if (!NONJUMP_INSN_P (insn))
2954 return false;
2955 if (asm_noperands (insn) >= 0)
2956 return false;
2958 pat = PATTERN (insn);
2959 if (GET_CODE (pat) != SET)
2960 return false;
2961 pat = SET_SRC (pat);
2963 if (unspec_caller_rtx_p (pat))
2964 return true;
2966 return false;
2969 /* Number of instructions used to make an arithmetic right shift by N. */
2970 static const char ashiftrt_insns[] =
2971 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2973 /* Description of a logical left or right shift, when expanded to a sequence
2974 of 1/2/8/16 shifts.
2975 Notice that one bit right shifts clobber the T bit. One bit left shifts
2976 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2977 enum
2979 ASHL_CLOBBERS_T = 1 << 0,
2980 LSHR_CLOBBERS_T = 1 << 1
2983 struct ashl_lshr_sequence
2985 char insn_count;
2986 char amount[6];
2987 char clobbers_t;
2990 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2992 { 0, { 0 }, 0 }, // 0
2993 { 1, { 1 }, LSHR_CLOBBERS_T },
2994 { 1, { 2 }, 0 },
2995 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2996 { 2, { 2, 2 }, 0 }, // 4
2997 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2998 { 3, { 2, 2, 2 }, 0 },
2999 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3000 { 1, { 8 }, 0 }, // 8
3001 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3002 { 2, { 8, 2 }, 0 },
3003 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3004 { 3, { 8, 2, 2 }, 0 }, // 12
3005 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3006 { 3, { 8, -2, 8 }, 0 },
3007 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3008 { 1, { 16 }, 0 }, // 16
3009 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3010 { 2, { 16, 2 }, 0 },
3011 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3012 { 3, { 16, 2, 2 }, 0 }, // 20
3013 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3014 { 3, { 16, -2, 8 }, 0 },
3015 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3016 { 2, { 16, 8 }, 0 }, // 24
3017 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3018 { 3, { 16, 8, 2 }, 0 },
3019 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3020 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3021 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3022 { 3, { 16, -2, 16 }, 0 },
3024 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3025 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3026 However, the shift-and combiner code needs this entry here to be in
3027 terms of real shift insns. */
3028 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3031 /* Individual shift amounts for shift amounts < 16, up to three highmost
3032 bits might be clobbered. This is typically used when combined with some
3033 kind of sign or zero extension. */
3034 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3036 { 0, { 0 }, 0 }, // 0
3037 { 1, { 1 }, LSHR_CLOBBERS_T },
3038 { 1, { 2 }, 0 },
3039 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3040 { 2, { 2, 2 }, 0 }, // 4
3041 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3042 { 2, { 8, -2 }, 0 },
3043 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3044 { 1, { 8 }, 0 }, // 8
3045 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3046 { 2, { 8, 2 }, 0 },
3047 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3048 { 3, { 8, 2, 2 }, 0 }, // 12
3049 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3050 { 2, { 16, -2 }, 0 },
3051 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3052 { 1, { 16 }, 0 }, // 16
3053 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3054 { 2, { 16, 2 }, 0 },
3055 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3056 { 3, { 16, 2, 2 }, 0 }, // 20
3057 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3058 { 3, { 16, -2, 8 }, 0 },
3059 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3060 { 2, { 16, 8 }, 0 }, // 24
3061 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3062 { 3, { 16, 8, 2 }, 0 },
3063 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3064 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3065 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3066 { 3, { 16, -2, 16 }, 0 },
3067 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3070 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3071 will clobber the T bit. */
3072 bool
3073 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3075 gcc_assert (CONST_INT_P (shift_amount));
3077 const int shift_amount_i = INTVAL (shift_amount) & 31;
3079 /* Special case for shift count of 31: use and-rotl sequence. */
3080 if (shift_amount_i == 31)
3081 return true;
3083 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3084 & ASHL_CLOBBERS_T) != 0;
3087 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3088 instructions will clobber the T bit. */
3089 bool
3090 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3092 gcc_assert (CONST_INT_P (shift_amount));
3094 const int shift_amount_i = INTVAL (shift_amount) & 31;
3096 /* Special case for shift count of 31: use shll-movt sequence. */
3097 if (shift_amount_i == 31)
3098 return true;
3100 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3101 & LSHR_CLOBBERS_T) != 0;
3104 /* Return true if it is potentially beneficial to use a dynamic shift
3105 instruction (shad / shar) instead of a combination of 1/2/8/16
3106 shift instructions for the specified shift count.
3107 If dynamic shifts are not available, always return false. */
3108 bool
3109 sh_dynamicalize_shift_p (rtx count)
3111 gcc_assert (CONST_INT_P (count));
3113 const int shift_amount_i = INTVAL (count) & 31;
3114 int insn_count;
3116 /* For left and right shifts, there are shorter 2 insn sequences for
3117 shift amounts of 31. */
3118 if (shift_amount_i == 31)
3119 insn_count = 2;
3120 else
3121 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3123 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3126 /* Assuming we have a value that has been sign-extended by at least one bit,
3127 can we use the ext_shift_amounts with the last shift turned to an
3128 arithmetic shift to shift it by N without data loss, and quicker than by
3129 other means? */
3130 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3132 /* Return the cost of a shift. */
3133 static inline int
3134 shiftcosts (rtx x)
3136 int value;
3138 if (TARGET_SHMEDIA)
3139 return 1;
3141 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3143 if (GET_MODE (x) == DImode
3144 && CONST_INT_P (XEXP (x, 1))
3145 && INTVAL (XEXP (x, 1)) == 1)
3146 return 2;
3148 /* Everything else is invalid, because there is no pattern for it. */
3149 return -1;
3151 /* If shift by a non constant, then this will be expensive. */
3152 if (!CONST_INT_P (XEXP (x, 1)))
3153 return SH_DYNAMIC_SHIFT_COST;
3155 /* Otherwise, return the true cost in instructions. Cope with out of range
3156 shift counts more or less arbitrarily. */
3157 value = INTVAL (XEXP (x, 1)) & 31;
3159 if (GET_CODE (x) == ASHIFTRT)
3161 int cost = ashiftrt_insns[value];
3162 /* If dynamic shifts are available and profitable in this case, then we
3163 put the constant in a reg and use shad. */
3164 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3165 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3166 return cost;
3168 else
3169 return ashl_lshr_seq[value].insn_count;
3172 /* Return the cost of an AND/XOR/IOR operation. */
3173 static inline int
3174 and_xor_ior_costs (rtx x, int code)
3176 /* On SH1-4 we have only max. SImode operations.
3177 Double the cost for modes > SImode. */
3178 const int cost_scale = !TARGET_SHMEDIA
3179 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3180 ? 2 : 1;
3182 /* A logical operation with two registers is a single cycle
3183 instruction. */
3184 if (!CONST_INT_P (XEXP (x, 1)))
3185 return 1 * cost_scale;
3187 int i = INTVAL (XEXP (x, 1));
3189 if (TARGET_SHMEDIA)
3191 if (satisfies_constraint_I10 (XEXP (x, 1))
3192 || satisfies_constraint_J16 (XEXP (x, 1)))
3193 return 1;
3194 else
3195 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3198 /* These constants are single cycle extu.[bw] instructions. */
3199 if ((i == 0xff || i == 0xffff) && code == AND)
3200 return 1 * cost_scale;
3201 /* Constants that can be used in an instruction as an immediate are
3202 a single cycle, but this requires r0, so make it a little more
3203 expensive. */
3204 if (CONST_OK_FOR_K08 (i))
3205 return 2 * cost_scale;
3206 /* Constants that can be loaded with a mov immediate need one more cycle.
3207 This case is probably unnecessary. */
3208 if (CONST_OK_FOR_I08 (i))
3209 return 2 * cost_scale;
3210 /* Any other constant requires an additional 2 cycle pc-relative load.
3211 This case is probably unnecessary. */
3212 return 3 * cost_scale;
3215 /* Return the cost of an addition or a subtraction. */
3216 static inline int
3217 addsubcosts (rtx x)
3219 if (GET_MODE (x) == SImode)
3221 /* The addc or subc patterns will eventually become one or two
3222 instructions. Below are some costs for some of the patterns
3223 which combine would reject because the costs of the individual
3224 insns in the patterns are lower.
3226 FIXME: It would be much easier if we had something like insn cost
3227 attributes and the cost calculation machinery used those attributes
3228 in the first place. This would eliminate redundant recog-like C
3229 code to calculate costs of complex patterns. */
3230 rtx op0 = XEXP (x, 0);
3231 rtx op1 = XEXP (x, 1);
3233 if (GET_CODE (x) == PLUS)
3235 if (GET_CODE (op0) == AND
3236 && XEXP (op0, 1) == const1_rtx
3237 && (GET_CODE (op1) == PLUS
3238 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3239 return 1;
3241 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3242 && GET_CODE (op1) == LSHIFTRT
3243 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3244 return 1;
3248 /* On SH1-4 we have only max. SImode operations.
3249 Double the cost for modes > SImode. */
3250 const int cost_scale = !TARGET_SHMEDIA
3251 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3252 ? 2 : 1;
3254 /* Adding a register is a single cycle insn. */
3255 if (REG_P (XEXP (x, 1))
3256 || GET_CODE (XEXP (x, 1)) == SUBREG)
3257 return 1 * cost_scale;
3259 /* Likewise for small constants. */
3260 if (CONST_INT_P (XEXP (x, 1))
3261 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3262 return 1 * cost_scale;
3264 if (TARGET_SHMEDIA)
3265 switch (GET_CODE (XEXP (x, 1)))
3267 case CONST:
3268 case LABEL_REF:
3269 case SYMBOL_REF:
3270 return TARGET_SHMEDIA64 ? 5 : 3;
3272 case CONST_INT:
3273 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3274 return 2;
3275 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3276 return 3;
3277 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3278 return 4;
3280 /* Fall through. */
3281 default:
3282 return 5;
3285 /* Any other constant requires a 2 cycle pc-relative load plus an
3286 addition. */
3287 return 3 * cost_scale;
3290 /* Return the cost of a multiply. */
3291 static inline int
3292 multcosts (rtx x ATTRIBUTE_UNUSED)
3294 if (sh_multcost >= 0)
3295 return sh_multcost;
3296 if (TARGET_SHMEDIA)
3297 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3298 accept constants. Ideally, we would use a cost of one or two and
3299 add the cost of the operand, but disregard the latter when inside loops
3300 and loop invariant code motion is still to follow.
3301 Using a multiply first and splitting it later if it's a loss
3302 doesn't work because of different sign / zero extension semantics
3303 of multiplies vs. shifts. */
3304 return optimize_size ? 2 : 3;
3306 if (TARGET_SH2)
3308 /* We have a mul insn, so we can never take more than the mul and the
3309 read of the mac reg, but count more because of the latency and extra
3310 reg usage. */
3311 if (optimize_size)
3312 return 2;
3313 return 3;
3316 /* If we're aiming at small code, then just count the number of
3317 insns in a multiply call sequence. */
3318 if (optimize_size)
3319 return 5;
3321 /* Otherwise count all the insns in the routine we'd be calling too. */
3322 return 20;
3325 /* Compute a (partial) cost for rtx X. Return true if the complete
3326 cost has been computed, and false if subexpressions should be
3327 scanned. In either case, *TOTAL contains the cost result. */
3328 static bool
3329 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3330 int *total, bool speed ATTRIBUTE_UNUSED)
3332 switch (code)
3334 /* The lower-subreg pass decides whether to split multi-word regs
3335 into individual regs by looking at the cost for a SET of certain
3336 modes with the following patterns:
3337 (set (reg) (reg))
3338 (set (reg) (const_int 0))
3339 On machines that support vector-move operations a multi-word move
3340 is the same cost as individual reg move. On SH there is no
3341 vector-move, so we have to provide the correct cost in the number
3342 of move insns to load/store the reg of the mode in question. */
3343 case SET:
3344 if (register_operand (SET_DEST (x), VOIDmode)
3345 && (register_operand (SET_SRC (x), VOIDmode)
3346 || satisfies_constraint_Z (SET_SRC (x))))
3348 const enum machine_mode mode = GET_MODE (SET_DEST (x));
3349 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3350 / mov_insn_size (mode, TARGET_SH2A));
3351 return true;
3353 return false;
3355 /* The cost of a mem access is mainly the cost of the address mode. */
3356 case MEM:
3357 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3358 true);
3359 return true;
3361 /* The cost of a sign or zero extend depends on whether the source is a
3362 reg or a mem. In case of a mem take the address into acount. */
3363 case SIGN_EXTEND:
3364 if (REG_P (XEXP (x, 0)))
3366 *total = COSTS_N_INSNS (1);
3367 return true;
3369 if (MEM_P (XEXP (x, 0)))
3371 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3372 GET_MODE (XEXP (x, 0)),
3373 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3374 return true;
3376 return false;
3378 case ZERO_EXTEND:
3379 if (REG_P (XEXP (x, 0)))
3381 *total = COSTS_N_INSNS (1);
3382 return true;
3384 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3385 && (GET_MODE (XEXP (x, 0)) == QImode
3386 || GET_MODE (XEXP (x, 0)) == HImode))
3388 /* Handle SH2A's movu.b and movu.w insn. */
3389 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3390 GET_MODE (XEXP (x, 0)),
3391 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3392 return true;
3394 return false;
3396 /* mems for SFmode and DFmode can be inside a parallel due to
3397 the way the fpscr is handled. */
3398 case PARALLEL:
3399 for (int i = 0; i < XVECLEN (x, 0); i++)
3401 rtx xx = XVECEXP (x, 0, i);
3402 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3404 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3405 GET_MODE (XEXP (xx, 0)),
3406 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3407 return true;
3409 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3411 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3412 GET_MODE (XEXP (xx, 1)),
3413 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3414 return true;
3418 if (sh_1el_vec (x, VOIDmode))
3419 *total = outer_code != SET;
3420 else if (sh_rep_vec (x, VOIDmode))
3421 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3422 + (outer_code != SET));
3423 else
3424 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3425 return true;
3427 case CONST_INT:
3428 if (TARGET_SHMEDIA)
3430 if (INTVAL (x) == 0)
3431 *total = 0;
3432 else if (outer_code == AND && and_operand ((x), DImode))
3433 *total = 0;
3434 else if ((outer_code == IOR || outer_code == XOR
3435 || outer_code == PLUS)
3436 && CONST_OK_FOR_I10 (INTVAL (x)))
3437 *total = 0;
3438 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3439 *total = COSTS_N_INSNS (outer_code != SET);
3440 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3441 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3442 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3443 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3444 else
3445 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3446 return true;
3448 if (CONST_OK_FOR_I08 (INTVAL (x)))
3449 *total = 0;
3450 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3451 && CONST_OK_FOR_K08 (INTVAL (x)))
3452 *total = 1;
3453 /* prepare_cmp_insn will force costly constants int registers before
3454 the cbranch[sd]i4 patterns can see them, so preserve potentially
3455 interesting ones not covered by I08 above. */
3456 else if (outer_code == COMPARE
3457 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3458 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3459 || INTVAL (x) == 0x7fffffff
3460 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3461 *total = 1;
3462 else
3463 *total = 8;
3464 return true;
3466 case EQ:
3467 /* An and with a constant compared against zero is
3468 most likely going to be a TST #imm, R0 instruction.
3469 Notice that this does not catch the zero_extract variants from
3470 the md file. */
3471 if (GET_CODE (XEXP (x, 0)) == AND
3472 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3474 *total = 1;
3475 return true;
3477 else
3478 return false;
3480 case SMIN:
3481 case SMAX:
3482 /* This is most likely a clips.b or clips.w insn that is being made up
3483 by combine. */
3484 if (TARGET_SH2A
3485 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3486 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3487 && REG_P (XEXP (XEXP (x, 0), 0))
3488 && CONST_INT_P (XEXP (x, 1)))
3490 *total = COSTS_N_INSNS (1);
3491 return true;
3493 else
3494 return false;
3496 case CONST:
3497 case LABEL_REF:
3498 case SYMBOL_REF:
3499 if (TARGET_SHMEDIA64)
3500 *total = COSTS_N_INSNS (4);
3501 else if (TARGET_SHMEDIA32)
3502 *total = COSTS_N_INSNS (2);
3503 else
3504 *total = 5;
3505 return true;
3507 case CONST_DOUBLE:
3508 if (TARGET_SHMEDIA)
3509 *total = COSTS_N_INSNS (4);
3510 /* prepare_cmp_insn will force costly constants int registers before
3511 the cbranchdi4 pattern can see them, so preserve potentially
3512 interesting ones. */
3513 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3514 *total = 1;
3515 else
3516 *total = 10;
3517 return true;
3519 case CONST_VECTOR:
3520 /* FIXME: This looks broken. Only the last statement has any effect.
3521 Probably this could be folded with the PARALLEL case? */
3522 if (x == CONST0_RTX (GET_MODE (x)))
3523 *total = 0;
3524 else if (sh_1el_vec (x, VOIDmode))
3525 *total = outer_code != SET;
3526 if (sh_rep_vec (x, VOIDmode))
3527 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3528 + (outer_code != SET));
3529 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3530 return true;
3532 case PLUS:
3533 case MINUS:
3534 *total = COSTS_N_INSNS (addsubcosts (x));
3535 return true;
3537 case AND:
3538 case XOR:
3539 case IOR:
3540 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3541 return true;
3543 case MULT:
3544 *total = COSTS_N_INSNS (multcosts (x));
3545 return true;
3547 case LT:
3548 case GE:
3549 /* div0s sign comparison. */
3550 if (GET_CODE (XEXP (x, 0)) == XOR
3551 && REG_P ((XEXP (XEXP (x, 0), 0)))
3552 && REG_P ((XEXP (XEXP (x, 0), 1)))
3553 && satisfies_constraint_Z (XEXP (x, 1)))
3555 *total = COSTS_N_INSNS (1);
3556 return true;
3558 else
3559 return false;
3561 case LSHIFTRT:
3562 /* div0s sign comparison. */
3563 if (GET_CODE (XEXP (x, 0)) == XOR
3564 && REG_P ((XEXP (XEXP (x, 0), 0)))
3565 && REG_P ((XEXP (XEXP (x, 0), 1)))
3566 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3568 *total = COSTS_N_INSNS (1);
3569 return true;
3571 /* Fall through to shiftcosts. */
3572 case ASHIFT:
3573 case ASHIFTRT:
3575 int cost = shiftcosts (x);
3576 if (cost < 0)
3577 return false;
3578 *total = COSTS_N_INSNS (cost);
3579 return true;
3582 case DIV:
3583 case UDIV:
3584 case MOD:
3585 case UMOD:
3586 *total = COSTS_N_INSNS (20);
3587 return true;
3589 case FLOAT:
3590 case FIX:
3591 *total = 100;
3592 return true;
3594 default:
3595 return false;
3599 /* Determine the size of the fundamental move insn that will be used
3600 for the specified mode. */
3601 static inline int
3602 mov_insn_size (enum machine_mode mode, bool consider_sh2a)
3604 const int mode_sz = GET_MODE_SIZE (mode);
3606 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3607 || (TARGET_FMOVD && mode == DFmode))
3608 return mode_sz;
3609 else
3611 /* The max. available mode for actual move insns is SImode.
3612 Larger accesses will be split into multiple loads/stores. */
3613 const int max_mov_sz = GET_MODE_SIZE (SImode);
3614 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3618 /* Determine the maximum possible displacement for a move insn for the
3619 specified mode. */
3621 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3623 /* The 4 byte displacement move insns are the same as the 2 byte
3624 versions but take a 12 bit displacement. All we need to do is to
3625 scale the max. displacement value accordingly. */
3626 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3628 /* SH2A supports FPU move insns with 12 bit displacements.
3629 Other variants to do not support any kind of displacements for
3630 FPU move insns. */
3631 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3632 return 0;
3633 else
3635 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3636 const int mode_sz = GET_MODE_SIZE (mode);
3637 int r = 15 * mov_insn_sz * disp_scale;
3639 /* If the mov insn will be split into multiple loads/stores, the
3640 maximum possible displacement is a bit smaller. */
3641 if (mode_sz > mov_insn_sz)
3642 r -= mode_sz - mov_insn_sz;
3643 return r;
3647 /* Determine the alignment mask for a move insn of the
3648 specified mode. */
3649 static inline int
3650 mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
3652 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3653 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3656 /* Return the displacement value of a displacement address. */
3657 HOST_WIDE_INT
3658 sh_disp_addr_displacement (rtx x)
3660 gcc_assert (satisfies_constraint_Sdd (x));
3661 return INTVAL (XEXP (XEXP (x, 0), 1));
3664 /* Compute the cost of an address. */
3665 static int
3666 sh_address_cost (rtx x, enum machine_mode mode,
3667 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3669 /* 'GBR + 0'. Account one more because of R0 restriction. */
3670 if (REG_P (x) && REGNO (x) == GBR_REG)
3671 return 2;
3673 /* Simple reg, post-inc, pre-dec addressing. */
3674 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3675 return 1;
3677 /* 'reg + disp' addressing. */
3678 if (GET_CODE (x) == PLUS
3679 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3681 /* 'GBR + disp'. Account one more because of R0 restriction. */
3682 if (REGNO (XEXP (x, 0)) == GBR_REG
3683 && gbr_displacement (XEXP (x, 1), mode))
3684 return 2;
3686 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3688 if (offset == 0)
3689 return 1;
3691 /* The displacement would fit into a 2 byte move insn.
3692 HImode and QImode loads/stores with displacement put pressure on
3693 R0 which will most likely require another reg copy. Thus account
3694 a higher cost for that. */
3695 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3696 return (mode == HImode || mode == QImode) ? 2 : 1;
3698 /* The displacement would fit into a 4 byte move insn (SH2A). */
3699 if (TARGET_SH2A
3700 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3701 return 2;
3703 /* The displacement is probably out of range and will require extra
3704 calculations. */
3705 return 3;
3708 /* 'reg + reg' addressing. Account a slightly higher cost because of
3709 increased pressure on R0. */
3710 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3711 && ! TARGET_SHMEDIA)
3712 return 3;
3714 /* Not sure what it is - probably expensive. */
3715 return 10;
3718 /* Code to expand a shift. */
3719 static void
3720 gen_ashift (int type, int n, rtx reg)
3722 rtx n_rtx;
3724 /* Negative values here come from the shift_amounts array. */
3725 if (n < 0)
3727 if (type == ASHIFT)
3728 type = LSHIFTRT;
3729 else
3730 type = ASHIFT;
3731 n = -n;
3734 n_rtx = GEN_INT (n);
3735 gcc_assert (satisfies_constraint_P27 (n_rtx));
3737 switch (type)
3739 case ASHIFTRT:
3740 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3741 break;
3742 case LSHIFTRT:
3743 if (n == 1)
3744 emit_insn (gen_shlr (reg, reg));
3745 else
3746 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3747 break;
3748 case ASHIFT:
3749 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3750 break;
3751 default:
3752 gcc_unreachable ();
3756 /* Code to expand a HImode shift. */
3757 static void
3758 gen_ashift_hi (int type, int n, rtx reg)
3760 /* Negative values here come from the shift_amounts array. */
3761 if (n < 0)
3763 if (type == ASHIFT)
3764 type = LSHIFTRT;
3765 else
3766 type = ASHIFT;
3767 n = -n;
3770 switch (type)
3772 case ASHIFTRT:
3773 case LSHIFTRT:
3774 /* We don't have HImode right shift operations because using the
3775 ordinary 32 bit shift instructions for that doesn't generate proper
3776 zero/sign extension.
3777 gen_ashift_hi is only called in contexts where we know that the
3778 sign extension works out correctly. */
3780 int offset = 0;
3781 if (GET_CODE (reg) == SUBREG)
3783 offset = SUBREG_BYTE (reg);
3784 reg = SUBREG_REG (reg);
3786 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3787 break;
3789 case ASHIFT:
3790 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3791 break;
3795 /* Output RTL to split a constant shift into its component SH constant
3796 shift instructions. */
3797 void
3798 gen_shifty_op (int code, rtx *operands)
3800 int value = INTVAL (operands[2]);
3801 int max, i;
3803 /* Truncate the shift count in case it is out of bounds. */
3804 value = value & 31;
3806 if (value == 31)
3808 if (code == LSHIFTRT)
3810 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3811 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3812 return;
3814 else if (code == ASHIFT)
3816 /* There is a two instruction sequence for 31 bit left shifts,
3817 but it requires r0. */
3818 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3820 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3821 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3822 return;
3826 else if (value == 0)
3828 /* This can happen even when optimizing, if there were subregs before
3829 reload. Don't output a nop here, as this is never optimized away;
3830 use a no-op move instead. */
3831 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3832 return;
3835 max = ashl_lshr_seq[value].insn_count;
3836 for (i = 0; i < max; i++)
3837 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3840 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3841 don't matter. */
3842 void
3843 gen_shifty_hi_op (int code, rtx *operands)
3845 int value = INTVAL (operands[2]);
3846 int max, i;
3847 void (*gen_fun) (int, int, rtx);
3849 /* This operation is used by and_shl for SImode values with a few
3850 high bits known to be cleared. */
3851 value &= 31;
3852 if (value == 0)
3854 emit_insn (gen_nop ());
3855 return;
3858 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3859 if (code == ASHIFT)
3861 max = ext_ashl_lshr_seq[value].insn_count;
3862 for (i = 0; i < max; i++)
3863 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3865 else
3866 /* When shifting right, emit the shifts in reverse order, so that
3867 solitary negative values come first. */
3868 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3869 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3872 /* Output RTL for an arithmetic right shift.
3873 ??? Rewrite to use super-optimizer sequences. */
3874 bool
3875 expand_ashiftrt (rtx *operands)
3877 rtx wrk;
3878 char func[18];
3879 int value;
3881 if (TARGET_DYNSHIFT)
3883 if (!CONST_INT_P (operands[2]))
3885 rtx count = copy_to_mode_reg (SImode, operands[2]);
3886 emit_insn (gen_negsi2 (count, count));
3887 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3888 return true;
3890 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3891 > 1 + SH_DYNAMIC_SHIFT_COST)
3893 rtx count
3894 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3895 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3896 return true;
3899 if (!CONST_INT_P (operands[2]))
3900 return false;
3902 value = INTVAL (operands[2]) & 31;
3904 if (value == 31)
3906 /* If we are called from abs expansion, arrange things so that we
3907 we can use a single MT instruction that doesn't clobber the source,
3908 if LICM can hoist out the load of the constant zero. */
3909 if (currently_expanding_to_rtl)
3911 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3912 operands[1]));
3913 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3914 return true;
3916 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3917 return true;
3919 else if (value >= 16 && value <= 19)
3921 wrk = gen_reg_rtx (SImode);
3922 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3923 value -= 16;
3924 while (value--)
3925 gen_ashift (ASHIFTRT, 1, wrk);
3926 emit_move_insn (operands[0], wrk);
3927 return true;
3929 /* Expand a short sequence inline, longer call a magic routine. */
3930 else if (value <= 5)
3932 wrk = gen_reg_rtx (SImode);
3933 emit_move_insn (wrk, operands[1]);
3934 while (value--)
3935 gen_ashift (ASHIFTRT, 1, wrk);
3936 emit_move_insn (operands[0], wrk);
3937 return true;
3940 wrk = gen_reg_rtx (Pmode);
3942 /* Load the value into an arg reg and call a helper. */
3943 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3944 sprintf (func, "__ashiftrt_r4_%d", value);
3945 function_symbol (wrk, func, SFUNC_STATIC);
3946 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3947 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3948 return true;
3951 /* Try to find a good way to implement the combiner pattern
3952 [(set (match_operand:SI 0 "register_operand" "r")
3953 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3954 (match_operand:SI 2 "const_int_operand" "n"))
3955 (match_operand:SI 3 "const_int_operand" "n"))) .
3956 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3957 return 0 for simple right / left or left/right shift combination.
3958 return 1 for a combination of shifts with zero_extend.
3959 return 2 for a combination of shifts with an AND that needs r0.
3960 return 3 for a combination of shifts with an AND that needs an extra
3961 scratch register, when the three highmost bits of the AND mask are clear.
3962 return 4 for a combination of shifts with an AND that needs an extra
3963 scratch register, when any of the three highmost bits of the AND mask
3964 is set.
3965 If ATTRP is set, store an initial right shift width in ATTRP[0],
3966 and the instruction length in ATTRP[1] . These values are not valid
3967 when returning 0.
3968 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3969 shift_amounts for the last shift value that is to be used before the
3970 sign extend. */
3972 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3974 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3975 int left = INTVAL (left_rtx), right;
3976 int best = 0;
3977 int cost, best_cost = 10000;
3978 int best_right = 0, best_len = 0;
3979 int i;
3980 int can_ext;
3982 if (left < 0 || left > 31)
3983 return 0;
3984 if (CONST_INT_P (mask_rtx))
3985 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3986 else
3987 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3988 /* Can this be expressed as a right shift / left shift pair? */
3989 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3990 right = exact_log2 (lsb);
3991 mask2 = ~(mask + lsb - 1);
3992 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3993 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3994 if (! mask2)
3995 best_cost = ashl_lshr_seq[right].insn_count
3996 + ashl_lshr_seq[right + left].insn_count;
3997 /* mask has no trailing zeroes <==> ! right */
3998 else if (! right && mask2 == ~(lsb2 - 1))
4000 int late_right = exact_log2 (lsb2);
4001 best_cost = ashl_lshr_seq[left + late_right].insn_count
4002 + ashl_lshr_seq[late_right].insn_count;
4004 /* Try to use zero extend. */
4005 if (mask2 == ~(lsb2 - 1))
4007 int width, first;
4009 for (width = 8; width <= 16; width += 8)
4011 /* Can we zero-extend right away? */
4012 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4014 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4015 + ext_ashl_lshr_seq[left + right].insn_count;
4016 if (cost < best_cost)
4018 best = 1;
4019 best_cost = cost;
4020 best_right = right;
4021 best_len = cost;
4022 if (attrp)
4023 attrp[2] = -1;
4025 continue;
4027 /* ??? Could try to put zero extend into initial right shift,
4028 or even shift a bit left before the right shift. */
4029 /* Determine value of first part of left shift, to get to the
4030 zero extend cut-off point. */
4031 first = width - exact_log2 (lsb2) + right;
4032 if (first >= 0 && right + left - first >= 0)
4034 cost = ext_ashl_lshr_seq[right].insn_count
4035 + ext_ashl_lshr_seq[first].insn_count + 1
4036 + ext_ashl_lshr_seq[right + left - first].insn_count;
4038 if (cost < best_cost)
4040 best = 1;
4041 best_cost = cost;
4042 best_right = right;
4043 best_len = cost;
4044 if (attrp)
4045 attrp[2] = first;
4050 /* Try to use r0 AND pattern */
4051 for (i = 0; i <= 2; i++)
4053 if (i > right)
4054 break;
4055 if (! CONST_OK_FOR_K08 (mask >> i))
4056 continue;
4057 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4058 if (cost < best_cost)
4060 best = 2;
4061 best_cost = cost;
4062 best_right = i;
4063 best_len = cost - 1;
4066 /* Try to use a scratch register to hold the AND operand. */
4067 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4068 for (i = 0; i <= 2; i++)
4070 if (i > right)
4071 break;
4072 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4073 + (can_ext
4074 ? ext_ashl_lshr_seq
4075 : ashl_lshr_seq)[left + i].insn_count;
4076 if (cost < best_cost)
4078 best = 4 - can_ext;
4079 best_cost = cost;
4080 best_right = i;
4081 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4085 if (attrp)
4087 attrp[0] = best_right;
4088 attrp[1] = best_len;
4090 return best;
4093 /* This is used in length attributes of the unnamed instructions
4094 corresponding to shl_and_kind return values of 1 and 2. */
4096 shl_and_length (rtx insn)
4098 rtx set_src, left_rtx, mask_rtx;
4099 int attributes[3];
4101 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4102 left_rtx = XEXP (XEXP (set_src, 0), 1);
4103 mask_rtx = XEXP (set_src, 1);
4104 shl_and_kind (left_rtx, mask_rtx, attributes);
4105 return attributes[1];
4108 /* This is used in length attribute of the and_shl_scratch instruction. */
4110 shl_and_scr_length (rtx insn)
4112 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4113 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4114 rtx op = XEXP (set_src, 0);
4115 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4116 op = XEXP (XEXP (op, 0), 0);
4117 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4120 /* Generate rtl for instructions for which shl_and_kind advised a particular
4121 method of generating them, i.e. returned zero. */
4122 bool
4123 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4125 int attributes[3];
4126 unsigned HOST_WIDE_INT mask;
4127 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4128 int right, total_shift;
4129 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4131 right = attributes[0];
4132 total_shift = INTVAL (left_rtx) + right;
4133 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4134 switch (kind)
4136 default:
4137 return true;
4138 case 1:
4140 int first = attributes[2];
4141 rtx operands[3];
4143 if (first < 0)
4145 emit_insn ((mask << right) <= 0xff
4146 ? gen_zero_extendqisi2 (dest,
4147 gen_lowpart (QImode, source))
4148 : gen_zero_extendhisi2 (dest,
4149 gen_lowpart (HImode, source)));
4150 source = dest;
4152 if (source != dest)
4153 emit_insn (gen_movsi (dest, source));
4154 operands[0] = dest;
4155 if (right)
4157 operands[2] = GEN_INT (right);
4158 gen_shifty_hi_op (LSHIFTRT, operands);
4160 if (first > 0)
4162 operands[2] = GEN_INT (first);
4163 gen_shifty_hi_op (ASHIFT, operands);
4164 total_shift -= first;
4165 mask <<= first;
4167 if (first >= 0)
4168 emit_insn (mask <= 0xff
4169 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4170 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4171 if (total_shift > 0)
4173 operands[2] = GEN_INT (total_shift);
4174 gen_shifty_hi_op (ASHIFT, operands);
4176 break;
4178 case 4:
4179 shift_gen_fun = gen_shifty_op;
4180 case 3:
4181 /* If the topmost bit that matters is set, set the topmost bits
4182 that don't matter. This way, we might be able to get a shorter
4183 signed constant. */
4184 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4185 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4186 case 2:
4187 /* Don't expand fine-grained when combining, because that will
4188 make the pattern fail. */
4189 if (currently_expanding_to_rtl
4190 || reload_in_progress || reload_completed)
4192 rtx operands[3];
4194 /* Cases 3 and 4 should be handled by this split
4195 only while combining */
4196 gcc_assert (kind <= 2);
4197 if (right)
4199 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4200 source = dest;
4202 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4203 if (total_shift)
4205 operands[0] = dest;
4206 operands[1] = dest;
4207 operands[2] = GEN_INT (total_shift);
4208 shift_gen_fun (ASHIFT, operands);
4210 break;
4212 else
4214 int neg = 0;
4215 if (kind != 4 && total_shift < 16)
4217 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4218 if (neg > 0)
4219 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4220 else
4221 neg = 0;
4223 emit_insn (gen_and_shl_scratch (dest, source,
4224 GEN_INT (right),
4225 GEN_INT (mask),
4226 GEN_INT (total_shift + neg),
4227 GEN_INT (neg)));
4228 emit_insn (gen_movsi (dest, dest));
4229 break;
4232 return false;
4235 /* Try to find a good way to implement the combiner pattern
4236 [(set (match_operand:SI 0 "register_operand" "=r")
4237 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4238 (match_operand:SI 2 "const_int_operand" "n")
4239 (match_operand:SI 3 "const_int_operand" "n")
4240 (const_int 0)))
4241 (clobber (reg:SI T_REG))]
4242 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4243 return 0 for simple left / right shift combination.
4244 return 1 for left shift / 8 bit sign extend / left shift.
4245 return 2 for left shift / 16 bit sign extend / left shift.
4246 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4247 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4248 return 5 for left shift / 16 bit sign extend / right shift
4249 return 6 for < 8 bit sign extend / left shift.
4250 return 7 for < 8 bit sign extend / left shift / single right shift.
4251 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4253 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4255 int left, size, insize, ext;
4256 int cost = 0, best_cost;
4257 int kind;
4259 left = INTVAL (left_rtx);
4260 size = INTVAL (size_rtx);
4261 insize = size - left;
4262 gcc_assert (insize > 0);
4263 /* Default to left / right shift. */
4264 kind = 0;
4265 best_cost = ashl_lshr_seq[32 - insize].insn_count
4266 + ashl_lshr_seq[32 - size].insn_count;
4267 if (size <= 16)
4269 /* 16 bit shift / sign extend / 16 bit shift */
4270 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4271 + ashl_lshr_seq[16 - size].insn_count;
4272 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4273 below, by alternative 3 or something even better. */
4274 if (cost < best_cost)
4276 kind = 5;
4277 best_cost = cost;
4280 /* Try a plain sign extend between two shifts. */
4281 for (ext = 16; ext >= insize; ext -= 8)
4283 if (ext <= size)
4285 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4286 + ashl_lshr_seq[size - ext].insn_count;
4287 if (cost < best_cost)
4289 kind = ext / (unsigned) 8;
4290 best_cost = cost;
4293 /* Check if we can do a sloppy shift with a final signed shift
4294 restoring the sign. */
4295 if (EXT_SHIFT_SIGNED (size - ext))
4296 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4297 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4298 /* If not, maybe it's still cheaper to do the second shift sloppy,
4299 and do a final sign extend? */
4300 else if (size <= 16)
4301 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4302 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4303 + 1;
4304 else
4305 continue;
4306 if (cost < best_cost)
4308 kind = ext / (unsigned) 8 + 2;
4309 best_cost = cost;
4312 /* Check if we can sign extend in r0 */
4313 if (insize < 8)
4315 cost = 3 + ashl_lshr_seq[left].insn_count;
4316 if (cost < best_cost)
4318 kind = 6;
4319 best_cost = cost;
4321 /* Try the same with a final signed shift. */
4322 if (left < 31)
4324 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4325 if (cost < best_cost)
4327 kind = 7;
4328 best_cost = cost;
4332 if (TARGET_DYNSHIFT)
4334 /* Try to use a dynamic shift. */
4335 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4336 if (cost < best_cost)
4338 kind = 0;
4339 best_cost = cost;
4342 if (costp)
4343 *costp = cost;
4344 return kind;
4347 /* Function to be used in the length attribute of the instructions
4348 implementing this pattern. */
4350 shl_sext_length (rtx insn)
4352 rtx set_src, left_rtx, size_rtx;
4353 int cost;
4355 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4356 left_rtx = XEXP (XEXP (set_src, 0), 1);
4357 size_rtx = XEXP (set_src, 1);
4358 shl_sext_kind (left_rtx, size_rtx, &cost);
4359 return cost;
4362 /* Generate rtl for this pattern */
4363 bool
4364 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4366 int kind;
4367 int left, size, insize, cost;
4368 rtx operands[3];
4370 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4371 left = INTVAL (left_rtx);
4372 size = INTVAL (size_rtx);
4373 insize = size - left;
4374 switch (kind)
4376 case 1:
4377 case 2:
4378 case 3:
4379 case 4:
4381 int ext = kind & 1 ? 8 : 16;
4382 int shift2 = size - ext;
4384 /* Don't expand fine-grained when combining, because that will
4385 make the pattern fail. */
4386 if (! currently_expanding_to_rtl
4387 && ! reload_in_progress && ! reload_completed)
4389 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4390 emit_insn (gen_movsi (dest, source));
4391 break;
4393 if (dest != source)
4394 emit_insn (gen_movsi (dest, source));
4395 operands[0] = dest;
4396 if (ext - insize)
4398 operands[2] = GEN_INT (ext - insize);
4399 gen_shifty_hi_op (ASHIFT, operands);
4401 emit_insn (kind & 1
4402 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4403 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4404 if (kind <= 2)
4406 if (shift2)
4408 operands[2] = GEN_INT (shift2);
4409 gen_shifty_op (ASHIFT, operands);
4412 else
4414 if (shift2 > 0)
4416 if (EXT_SHIFT_SIGNED (shift2))
4418 operands[2] = GEN_INT (shift2 + 1);
4419 gen_shifty_op (ASHIFT, operands);
4420 operands[2] = const1_rtx;
4421 gen_shifty_op (ASHIFTRT, operands);
4422 break;
4424 operands[2] = GEN_INT (shift2);
4425 gen_shifty_hi_op (ASHIFT, operands);
4427 else if (shift2)
4429 operands[2] = GEN_INT (-shift2);
4430 gen_shifty_hi_op (LSHIFTRT, operands);
4432 emit_insn (size <= 8
4433 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4434 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4436 break;
4438 case 5:
4440 int i = 16 - size;
4441 if (! currently_expanding_to_rtl
4442 && ! reload_in_progress && ! reload_completed)
4443 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4444 else
4446 operands[0] = dest;
4447 operands[2] = GEN_INT (16 - insize);
4448 gen_shifty_hi_op (ASHIFT, operands);
4449 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4451 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4452 while (--i >= 0)
4453 gen_ashift (ASHIFTRT, 1, dest);
4454 break;
4456 case 6:
4457 case 7:
4458 /* Don't expand fine-grained when combining, because that will
4459 make the pattern fail. */
4460 if (! currently_expanding_to_rtl
4461 && ! reload_in_progress && ! reload_completed)
4463 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4464 emit_insn (gen_movsi (dest, source));
4465 break;
4467 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4468 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4469 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4470 operands[0] = dest;
4471 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4472 gen_shifty_op (ASHIFT, operands);
4473 if (kind == 7)
4474 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4475 break;
4476 default:
4477 return true;
4479 return false;
4482 /* Prefix a symbol_ref name with "datalabel". */
4484 gen_datalabel_ref (rtx sym)
4486 const char *str;
4488 if (GET_CODE (sym) == LABEL_REF)
4489 return gen_rtx_CONST (GET_MODE (sym),
4490 gen_rtx_UNSPEC (GET_MODE (sym),
4491 gen_rtvec (1, sym),
4492 UNSPEC_DATALABEL));
4494 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4496 str = XSTR (sym, 0);
4497 /* Share all SYMBOL_REF strings with the same value - that is important
4498 for cse. */
4499 str = IDENTIFIER_POINTER (get_identifier (str));
4500 XSTR (sym, 0) = str;
4502 return sym;
4506 static alloc_pool label_ref_list_pool;
4508 typedef struct label_ref_list_d
4510 rtx label;
4511 struct label_ref_list_d *next;
4512 } *label_ref_list_t;
4514 /* The SH cannot load a large constant into a register, constants have to
4515 come from a pc relative load. The reference of a pc relative load
4516 instruction must be less than 1k in front of the instruction. This
4517 means that we often have to dump a constant inside a function, and
4518 generate code to branch around it.
4520 It is important to minimize this, since the branches will slow things
4521 down and make things bigger.
4523 Worst case code looks like:
4525 mov.l L1,rn
4526 bra L2
4528 align
4529 L1: .long value
4533 mov.l L3,rn
4534 bra L4
4536 align
4537 L3: .long value
4541 We fix this by performing a scan before scheduling, which notices which
4542 instructions need to have their operands fetched from the constant table
4543 and builds the table.
4545 The algorithm is:
4547 scan, find an instruction which needs a pcrel move. Look forward, find the
4548 last barrier which is within MAX_COUNT bytes of the requirement.
4549 If there isn't one, make one. Process all the instructions between
4550 the find and the barrier.
4552 In the above example, we can tell that L3 is within 1k of L1, so
4553 the first move can be shrunk from the 3 insn+constant sequence into
4554 just 1 insn, and the constant moved to L3 to make:
4556 mov.l L1,rn
4558 mov.l L3,rn
4559 bra L4
4561 align
4562 L3:.long value
4563 L4:.long value
4565 Then the second move becomes the target for the shortening process. */
4567 typedef struct
4569 rtx value; /* Value in table. */
4570 rtx label; /* Label of value. */
4571 label_ref_list_t wend; /* End of window. */
4572 enum machine_mode mode; /* Mode of value. */
4574 /* True if this constant is accessed as part of a post-increment
4575 sequence. Note that HImode constants are never accessed in this way. */
4576 bool part_of_sequence_p;
4577 } pool_node;
4579 /* The maximum number of constants that can fit into one pool, since
4580 constants in the range 0..510 are at least 2 bytes long, and in the
4581 range from there to 1018 at least 4 bytes. */
4583 #define MAX_POOL_SIZE 372
4584 static pool_node pool_vector[MAX_POOL_SIZE];
4585 static int pool_size;
4586 static rtx pool_window_label;
4587 static int pool_window_last;
4589 static int max_labelno_before_reorg;
4591 /* ??? If we need a constant in HImode which is the truncated value of a
4592 constant we need in SImode, we could combine the two entries thus saving
4593 two bytes. Is this common enough to be worth the effort of implementing
4594 it? */
4596 /* ??? This stuff should be done at the same time that we shorten branches.
4597 As it is now, we must assume that all branches are the maximum size, and
4598 this causes us to almost always output constant pools sooner than
4599 necessary. */
4601 /* Add a constant to the pool and return its label. */
4602 static rtx
4603 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4605 int i;
4606 rtx lab, new_rtx;
4607 label_ref_list_t ref, newref;
4609 /* First see if we've already got it. */
4610 for (i = 0; i < pool_size; i++)
4612 if (x->code == pool_vector[i].value->code
4613 && mode == pool_vector[i].mode)
4615 if (x->code == CODE_LABEL)
4617 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4618 continue;
4620 if (rtx_equal_p (x, pool_vector[i].value))
4622 lab = new_rtx = 0;
4623 if (! last_value
4624 || ! i
4625 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4627 new_rtx = gen_label_rtx ();
4628 LABEL_REFS (new_rtx) = pool_vector[i].label;
4629 pool_vector[i].label = lab = new_rtx;
4631 if (lab && pool_window_label)
4633 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4634 newref->label = pool_window_label;
4635 ref = pool_vector[pool_window_last].wend;
4636 newref->next = ref;
4637 pool_vector[pool_window_last].wend = newref;
4639 if (new_rtx)
4640 pool_window_label = new_rtx;
4641 pool_window_last = i;
4642 return lab;
4647 /* Need a new one. */
4648 pool_vector[pool_size].value = x;
4649 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4651 lab = 0;
4652 pool_vector[pool_size - 1].part_of_sequence_p = true;
4654 else
4655 lab = gen_label_rtx ();
4656 pool_vector[pool_size].mode = mode;
4657 pool_vector[pool_size].label = lab;
4658 pool_vector[pool_size].wend = NULL;
4659 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4660 if (lab && pool_window_label)
4662 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4663 newref->label = pool_window_label;
4664 ref = pool_vector[pool_window_last].wend;
4665 newref->next = ref;
4666 pool_vector[pool_window_last].wend = newref;
4668 if (lab)
4669 pool_window_label = lab;
4670 pool_window_last = pool_size;
4671 pool_size++;
4672 return lab;
4675 /* Output the literal table. START, if nonzero, is the first instruction
4676 this table is needed for, and also indicates that there is at least one
4677 casesi_worker_2 instruction; We have to emit the operand3 labels from
4678 these insns at a 4-byte aligned position. BARRIER is the barrier
4679 after which we are to place the table. */
4680 static void
4681 dump_table (rtx start, rtx barrier)
4683 rtx scan = barrier;
4684 int i;
4685 bool need_align = true;
4686 rtx lab;
4687 label_ref_list_t ref;
4688 bool have_df = false;
4690 /* Do two passes, first time dump out the HI sized constants. */
4692 for (i = 0; i < pool_size; i++)
4694 pool_node *p = &pool_vector[i];
4696 if (p->mode == HImode)
4698 if (need_align)
4700 scan = emit_insn_after (gen_align_2 (), scan);
4701 need_align = false;
4703 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4704 scan = emit_label_after (lab, scan);
4705 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4706 scan);
4707 for (ref = p->wend; ref; ref = ref->next)
4709 lab = ref->label;
4710 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4713 else if (p->mode == DFmode)
4714 have_df = true;
4717 need_align = true;
4719 if (start)
4721 scan = emit_insn_after (gen_align_4 (), scan);
4722 need_align = false;
4723 for (; start != barrier; start = NEXT_INSN (start))
4724 if (NONJUMP_INSN_P (start)
4725 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4727 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4728 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4730 scan = emit_label_after (lab, scan);
4733 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4735 rtx align_insn = NULL_RTX;
4737 scan = emit_label_after (gen_label_rtx (), scan);
4738 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4739 need_align = false;
4741 for (i = 0; i < pool_size; i++)
4743 pool_node *p = &pool_vector[i];
4745 switch (p->mode)
4747 case HImode:
4748 break;
4749 case SImode:
4750 case SFmode:
4751 if (align_insn && !p->part_of_sequence_p)
4753 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4754 emit_label_before (lab, align_insn);
4755 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4756 align_insn);
4757 for (ref = p->wend; ref; ref = ref->next)
4759 lab = ref->label;
4760 emit_insn_before (gen_consttable_window_end (lab),
4761 align_insn);
4763 delete_insn (align_insn);
4764 align_insn = NULL_RTX;
4765 continue;
4767 else
4769 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4770 scan = emit_label_after (lab, scan);
4771 scan = emit_insn_after (gen_consttable_4 (p->value,
4772 const0_rtx), scan);
4773 need_align = ! need_align;
4775 break;
4776 case DFmode:
4777 if (need_align)
4779 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4780 align_insn = scan;
4781 need_align = false;
4783 case DImode:
4784 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4785 scan = emit_label_after (lab, scan);
4786 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4787 scan);
4788 break;
4789 default:
4790 gcc_unreachable ();
4793 if (p->mode != HImode)
4795 for (ref = p->wend; ref; ref = ref->next)
4797 lab = ref->label;
4798 scan = emit_insn_after (gen_consttable_window_end (lab),
4799 scan);
4804 pool_size = 0;
4807 for (i = 0; i < pool_size; i++)
4809 pool_node *p = &pool_vector[i];
4811 switch (p->mode)
4813 case HImode:
4814 break;
4815 case SImode:
4816 case SFmode:
4817 if (need_align)
4819 need_align = false;
4820 scan = emit_label_after (gen_label_rtx (), scan);
4821 scan = emit_insn_after (gen_align_4 (), scan);
4823 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4824 scan = emit_label_after (lab, scan);
4825 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4826 scan);
4827 break;
4828 case DFmode:
4829 case DImode:
4830 if (need_align)
4832 need_align = false;
4833 scan = emit_label_after (gen_label_rtx (), scan);
4834 scan = emit_insn_after (gen_align_4 (), scan);
4836 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4837 scan = emit_label_after (lab, scan);
4838 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4839 scan);
4840 break;
4841 default:
4842 gcc_unreachable ();
4845 if (p->mode != HImode)
4847 for (ref = p->wend; ref; ref = ref->next)
4849 lab = ref->label;
4850 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4855 scan = emit_insn_after (gen_consttable_end (), scan);
4856 scan = emit_barrier_after (scan);
4857 pool_size = 0;
4858 pool_window_label = NULL_RTX;
4859 pool_window_last = 0;
4862 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4864 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4866 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4867 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4868 need to fix it if the input value is CONST_OK_FOR_I08. */
4869 static bool
4870 broken_move (rtx insn)
4872 if (NONJUMP_INSN_P (insn))
4874 rtx pat = PATTERN (insn);
4875 if (GET_CODE (pat) == PARALLEL)
4876 pat = XVECEXP (pat, 0, 0);
4877 if (GET_CODE (pat) == SET
4878 /* We can load any 8-bit value if we don't care what the high
4879 order bits end up as. */
4880 && GET_MODE (SET_DEST (pat)) != QImode
4881 && (CONSTANT_P (SET_SRC (pat))
4882 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4883 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4884 /* Match mova_const. */
4885 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4886 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4887 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4888 && ! (TARGET_SH2E
4889 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4890 && (fp_zero_operand (SET_SRC (pat))
4891 || fp_one_operand (SET_SRC (pat)))
4892 /* In general we don't know the current setting of fpscr, so
4893 disable fldi.
4894 There is an exception if this was a register-register move
4895 before reload - and hence it was ascertained that we have
4896 single precision setting - and in a post-reload optimization
4897 we changed this to do a constant load. In that case
4898 we don't have an r0 clobber, hence we must use fldi. */
4899 && (TARGET_FMOVD
4900 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4901 == SCRATCH))
4902 && REG_P (SET_DEST (pat))
4903 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4904 && ! (TARGET_SH2A
4905 && GET_MODE (SET_DEST (pat)) == SImode
4906 && (satisfies_constraint_I20 (SET_SRC (pat))
4907 || satisfies_constraint_I28 (SET_SRC (pat))))
4908 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4909 return true;
4912 return false;
4915 /* Return true if the specified insn is a mova insn. */
4916 static bool
4917 mova_p (rtx insn)
4919 return (NONJUMP_INSN_P (insn)
4920 && GET_CODE (PATTERN (insn)) == SET
4921 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4922 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4923 /* Don't match mova_const. */
4924 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4927 /* Fix up a mova from a switch that went out of range. */
4928 static void
4929 fixup_mova (rtx mova)
4931 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4932 if (! flag_pic)
4934 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4935 INSN_CODE (mova) = -1;
4937 else
4939 rtx worker = mova;
4940 rtx lab = gen_label_rtx ();
4941 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4945 worker = NEXT_INSN (worker);
4946 gcc_assert (worker
4947 && !LABEL_P (worker)
4948 && !JUMP_P (worker));
4949 } while (NOTE_P (worker)
4950 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4951 wpat = PATTERN (worker);
4952 wpat0 = XVECEXP (wpat, 0, 0);
4953 wpat1 = XVECEXP (wpat, 0, 1);
4954 wsrc = SET_SRC (wpat0);
4955 PATTERN (worker) = (gen_casesi_worker_2
4956 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4957 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4958 XEXP (wpat1, 0)));
4959 INSN_CODE (worker) = -1;
4960 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4961 base = gen_rtx_LABEL_REF (Pmode, lab);
4962 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4963 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4964 INSN_CODE (mova) = -1;
4968 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4969 *num_mova, and check if the new mova is not nested within the first one.
4970 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4971 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4972 static int
4973 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4975 int n_addr = 0; /* Initialization to shut up spurious warning. */
4976 int f_target, n_target = 0; /* Likewise. */
4978 if (optimize)
4980 /* If NEW_MOVA has no address yet, it will be handled later. */
4981 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4982 return -1;
4984 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4985 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4986 if (n_addr > n_target || n_addr + 1022 < n_target)
4988 /* Change the mova into a load.
4989 broken_move will then return true for it. */
4990 fixup_mova (new_mova);
4991 return 1;
4994 if (!(*num_mova)++)
4996 *first_mova = new_mova;
4997 return 2;
4999 if (!optimize
5000 || ((f_target
5001 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5002 >= n_target))
5003 return -1;
5005 (*num_mova)--;
5006 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5007 > n_target - n_addr)
5009 fixup_mova (*first_mova);
5010 return 0;
5012 else
5014 fixup_mova (new_mova);
5015 return 1;
5019 /* Find the last barrier from insn FROM which is close enough to hold the
5020 constant pool. If we can't find one, then create one near the end of
5021 the range. */
5022 static rtx
5023 find_barrier (int num_mova, rtx mova, rtx from)
5025 int count_si = 0;
5026 int count_hi = 0;
5027 int found_hi = 0;
5028 int found_si = 0;
5029 int found_di = 0;
5030 int hi_align = 2;
5031 int si_align = 2;
5032 int leading_mova = num_mova;
5033 rtx barrier_before_mova = NULL_RTX;
5034 rtx found_barrier = NULL_RTX;
5035 rtx good_barrier = NULL_RTX;
5036 int si_limit;
5037 int hi_limit;
5038 rtx orig = from;
5039 rtx last_got = NULL_RTX;
5040 rtx last_symoff = NULL_RTX;
5042 /* For HImode: range is 510, add 4 because pc counts from address of
5043 second instruction after this one, subtract 2 for the jump instruction
5044 that we may need to emit before the table, subtract 2 for the instruction
5045 that fills the jump delay slot (in very rare cases, reorg will take an
5046 instruction from after the constant pool or will leave the delay slot
5047 empty). This gives 510.
5048 For SImode: range is 1020, add 4 because pc counts from address of
5049 second instruction after this one, subtract 2 in case pc is 2 byte
5050 aligned, subtract 2 for the jump instruction that we may need to emit
5051 before the table, subtract 2 for the instruction that fills the jump
5052 delay slot. This gives 1018. */
5054 /* The branch will always be shortened now that the reference address for
5055 forward branches is the successor address, thus we need no longer make
5056 adjustments to the [sh]i_limit for -O0. */
5058 si_limit = 1018;
5059 hi_limit = 510;
5061 while (from && count_si < si_limit && count_hi < hi_limit)
5063 int inc = get_attr_length (from);
5064 int new_align = 1;
5066 /* If this is a label that existed at the time of the compute_alignments
5067 call, determine the alignment. N.B. When find_barrier recurses for
5068 an out-of-reach mova, we might see labels at the start of previously
5069 inserted constant tables. */
5070 if (LABEL_P (from)
5071 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5073 if (optimize)
5074 new_align = 1 << label_to_alignment (from);
5075 else if (BARRIER_P (prev_nonnote_insn (from)))
5076 new_align = 1 << barrier_align (from);
5077 else
5078 new_align = 1;
5079 inc = 0;
5081 /* In case we are scanning a constant table because of recursion, check
5082 for explicit alignments. If the table is long, we might be forced
5083 to emit the new table in front of it; the length of the alignment
5084 might be the last straw. */
5085 else if (NONJUMP_INSN_P (from)
5086 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5087 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5088 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5089 /* When we find the end of a constant table, paste the new constant
5090 at the end. That is better than putting it in front because
5091 this way, we don't need extra alignment for adding a 4-byte-aligned
5092 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5093 else if (NONJUMP_INSN_P (from)
5094 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5095 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5096 return from;
5098 if (BARRIER_P (from))
5100 rtx next;
5102 found_barrier = from;
5104 /* If we are at the end of the function, or in front of an alignment
5105 instruction, we need not insert an extra alignment. We prefer
5106 this kind of barrier. */
5107 if (barrier_align (from) > 2)
5108 good_barrier = from;
5110 /* If we are at the end of a hot/cold block, dump the constants
5111 here. */
5112 next = NEXT_INSN (from);
5113 if (next
5114 && NOTE_P (next)
5115 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5116 break;
5119 if (broken_move (from))
5121 rtx pat, src, dst;
5122 enum machine_mode mode;
5124 pat = PATTERN (from);
5125 if (GET_CODE (pat) == PARALLEL)
5126 pat = XVECEXP (pat, 0, 0);
5127 src = SET_SRC (pat);
5128 dst = SET_DEST (pat);
5129 mode = GET_MODE (dst);
5131 /* GOT pcrelat setting comes in pair of
5132 mova .L8,r0
5133 mov.l .L8,r12
5134 instructions. (plus add r0,r12).
5135 Remember if we see one without the other. */
5136 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5137 last_got = last_got ? NULL_RTX : from;
5138 else if (PIC_ADDR_P (src))
5139 last_got = last_got ? NULL_RTX : from;
5141 /* We must explicitly check the mode, because sometimes the
5142 front end will generate code to load unsigned constants into
5143 HImode targets without properly sign extending them. */
5144 if (mode == HImode
5145 || (mode == SImode && satisfies_constraint_I16 (src)
5146 && REGNO (dst) != FPUL_REG))
5148 found_hi += 2;
5149 /* We put the short constants before the long constants, so
5150 we must count the length of short constants in the range
5151 for the long constants. */
5152 /* ??? This isn't optimal, but is easy to do. */
5153 si_limit -= 2;
5155 else
5157 /* We dump DF/DI constants before SF/SI ones, because
5158 the limit is the same, but the alignment requirements
5159 are higher. We may waste up to 4 additional bytes
5160 for alignment, and the DF/DI constant may have
5161 another SF/SI constant placed before it. */
5162 if (TARGET_SHCOMPACT
5163 && ! found_di
5164 && (mode == DFmode || mode == DImode))
5166 found_di = 1;
5167 si_limit -= 8;
5169 while (si_align > 2 && found_si + si_align - 2 > count_si)
5170 si_align >>= 1;
5171 if (found_si > count_si)
5172 count_si = found_si;
5173 found_si += GET_MODE_SIZE (mode);
5174 if (num_mova)
5175 si_limit -= GET_MODE_SIZE (mode);
5179 if (mova_p (from))
5181 switch (untangle_mova (&num_mova, &mova, from))
5183 case 1:
5184 if (flag_pic)
5186 rtx src = SET_SRC (PATTERN (from));
5187 if (GET_CODE (src) == CONST
5188 && GET_CODE (XEXP (src, 0)) == UNSPEC
5189 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5190 last_symoff = from;
5192 break;
5193 case 0: return find_barrier (0, 0, mova);
5194 case 2:
5196 leading_mova = 0;
5197 barrier_before_mova
5198 = good_barrier ? good_barrier : found_barrier;
5200 default: break;
5202 if (found_si > count_si)
5203 count_si = found_si;
5205 else if (JUMP_TABLE_DATA_P (from)
5206 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5208 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5209 || (num_mova
5210 && (prev_nonnote_insn (from)
5211 == XEXP (MOVA_LABELREF (mova), 0))))
5212 num_mova--;
5213 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5215 /* We have just passed the barrier in front of the
5216 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5217 the ADDR_DIFF_VEC is accessed as data, just like our pool
5218 constants, this is a good opportunity to accommodate what
5219 we have gathered so far.
5220 If we waited any longer, we could end up at a barrier in
5221 front of code, which gives worse cache usage for separated
5222 instruction / data caches. */
5223 good_barrier = found_barrier;
5224 break;
5226 else
5228 rtx body = PATTERN (from);
5229 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5232 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5233 else if (JUMP_P (from)
5234 && ! TARGET_SH2
5235 && ! optimize_size)
5236 new_align = 4;
5238 /* There is a possibility that a bf is transformed into a bf/s by the
5239 delay slot scheduler. */
5240 if (JUMP_P (from)
5241 && get_attr_type (from) == TYPE_CBRANCH
5242 && ! sequence_insn_p (from))
5243 inc += 2;
5245 if (found_si)
5247 count_si += inc;
5248 if (new_align > si_align)
5250 si_limit -= (count_si - 1) & (new_align - si_align);
5251 si_align = new_align;
5253 count_si = (count_si + new_align - 1) & -new_align;
5255 if (found_hi)
5257 count_hi += inc;
5258 if (new_align > hi_align)
5260 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5261 hi_align = new_align;
5263 count_hi = (count_hi + new_align - 1) & -new_align;
5265 from = NEXT_INSN (from);
5268 if (num_mova)
5270 if (leading_mova)
5272 /* Try as we might, the leading mova is out of range. Change
5273 it into a load (which will become a pcload) and retry. */
5274 fixup_mova (mova);
5275 return find_barrier (0, 0, mova);
5277 else
5279 /* Insert the constant pool table before the mova instruction,
5280 to prevent the mova label reference from going out of range. */
5281 from = mova;
5282 good_barrier = found_barrier = barrier_before_mova;
5286 if (found_barrier)
5288 if (good_barrier && next_real_insn (found_barrier))
5289 found_barrier = good_barrier;
5291 else
5293 /* We didn't find a barrier in time to dump our stuff,
5294 so we'll make one. */
5295 rtx label = gen_label_rtx ();
5297 /* Don't emit a constant table in the middle of insns for
5298 casesi_worker_2. This is a bit overkill but is enough
5299 because casesi_worker_2 wouldn't appear so frequently. */
5300 if (last_symoff)
5301 from = last_symoff;
5303 /* If we exceeded the range, then we must back up over the last
5304 instruction we looked at. Otherwise, we just need to undo the
5305 NEXT_INSN at the end of the loop. */
5306 if (PREV_INSN (from) != orig
5307 && (count_hi > hi_limit || count_si > si_limit))
5308 from = PREV_INSN (PREV_INSN (from));
5309 else
5310 from = PREV_INSN (from);
5312 /* Don't emit a constant table int the middle of global pointer setting,
5313 since that that would move the addressing base GOT into another table.
5314 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5315 in the pool anyway, so just move up the whole constant pool.
5317 However, avoid doing so when the last single GOT mov is the starting
5318 insn itself. Going past above the start insn would create a negative
5319 offset, causing errors. */
5320 if (last_got && last_got != orig)
5321 from = PREV_INSN (last_got);
5323 /* Don't insert the constant pool table at the position which
5324 may be the landing pad. */
5325 if (flag_exceptions
5326 && CALL_P (from)
5327 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5328 from = PREV_INSN (from);
5330 /* Walk back to be just before any jump or label.
5331 Putting it before a label reduces the number of times the branch
5332 around the constant pool table will be hit. Putting it before
5333 a jump makes it more likely that the bra delay slot will be
5334 filled. */
5335 while (NOTE_P (from) || JUMP_P (from)
5336 || LABEL_P (from))
5337 from = PREV_INSN (from);
5339 /* Make sure we do not split between a call and its corresponding
5340 CALL_ARG_LOCATION note. */
5341 if (CALL_P (from))
5343 rtx next = NEXT_INSN (from);
5344 if (next && NOTE_P (next)
5345 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5346 from = next;
5349 from = emit_jump_insn_after (gen_jump (label), from);
5350 JUMP_LABEL (from) = label;
5351 LABEL_NUSES (label) = 1;
5352 found_barrier = emit_barrier_after (from);
5353 emit_label_after (label, found_barrier);
5356 return found_barrier;
5359 /* If the instruction INSN is implemented by a special function, and we can
5360 positively find the register that is used to call the sfunc, and this
5361 register is not used anywhere else in this instruction - except as the
5362 destination of a set, return this register; else, return 0. */
5364 sfunc_uses_reg (rtx insn)
5366 int i;
5367 rtx pattern, part, reg_part, reg;
5369 if (!NONJUMP_INSN_P (insn))
5370 return NULL_RTX;
5371 pattern = PATTERN (insn);
5372 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5373 return NULL_RTX;
5375 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5377 part = XVECEXP (pattern, 0, i);
5378 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5379 reg_part = part;
5381 if (! reg_part)
5382 return NULL_RTX;
5383 reg = XEXP (reg_part, 0);
5384 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5386 part = XVECEXP (pattern, 0, i);
5387 if (part == reg_part || GET_CODE (part) == CLOBBER)
5388 continue;
5389 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5390 && REG_P (SET_DEST (part)))
5391 ? SET_SRC (part) : part)))
5392 return NULL_RTX;
5394 return reg;
5397 /* See if the only way in which INSN uses REG is by calling it, or by
5398 setting it while calling it. Set *SET to a SET rtx if the register
5399 is set by INSN. */
5400 static bool
5401 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
5403 rtx pattern, reg2;
5405 *set = NULL_RTX;
5407 reg2 = sfunc_uses_reg (insn);
5408 if (reg2 && REGNO (reg2) == REGNO (reg))
5410 pattern = single_set (insn);
5411 if (pattern
5412 && REG_P (SET_DEST (pattern))
5413 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5414 *set = pattern;
5415 return false;
5417 if (!CALL_P (insn))
5419 /* We don't use rtx_equal_p because we don't care if the mode is
5420 different. */
5421 pattern = single_set (insn);
5422 if (pattern
5423 && REG_P (SET_DEST (pattern))
5424 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5426 rtx par, part;
5427 int i;
5429 *set = pattern;
5430 par = PATTERN (insn);
5431 if (GET_CODE (par) == PARALLEL)
5432 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5434 part = XVECEXP (par, 0, i);
5435 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5436 return true;
5438 return reg_mentioned_p (reg, SET_SRC (pattern));
5441 return true;
5444 pattern = PATTERN (insn);
5446 if (GET_CODE (pattern) == PARALLEL)
5448 int i;
5450 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5451 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5452 return true;
5453 pattern = XVECEXP (pattern, 0, 0);
5456 if (GET_CODE (pattern) == SET)
5458 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5460 /* We don't use rtx_equal_p, because we don't care if the
5461 mode is different. */
5462 if (!REG_P (SET_DEST (pattern))
5463 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5464 return true;
5466 *set = pattern;
5469 pattern = SET_SRC (pattern);
5472 if (GET_CODE (pattern) != CALL
5473 || !MEM_P (XEXP (pattern, 0))
5474 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5475 return true;
5477 return false;
5480 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5481 general registers. Bits 0..15 mean that the respective registers
5482 are used as inputs in the instruction. Bits 16..31 mean that the
5483 registers 0..15, respectively, are used as outputs, or are clobbered.
5484 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5486 regs_used (rtx x, int is_dest)
5488 enum rtx_code code;
5489 const char *fmt;
5490 int i, used = 0;
5492 if (! x)
5493 return used;
5494 code = GET_CODE (x);
5495 switch (code)
5497 case REG:
5498 if (REGNO (x) < 16)
5499 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5500 << (REGNO (x) + is_dest));
5501 return 0;
5502 case SUBREG:
5504 rtx y = SUBREG_REG (x);
5506 if (!REG_P (y))
5507 break;
5508 if (REGNO (y) < 16)
5509 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5510 << (REGNO (y) +
5511 subreg_regno_offset (REGNO (y),
5512 GET_MODE (y),
5513 SUBREG_BYTE (x),
5514 GET_MODE (x)) + is_dest));
5515 return 0;
5517 case SET:
5518 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5519 case RETURN:
5520 /* If there was a return value, it must have been indicated with USE. */
5521 return 0x00ffff00;
5522 case CLOBBER:
5523 is_dest = 1;
5524 break;
5525 case MEM:
5526 is_dest = 0;
5527 break;
5528 case CALL:
5529 used |= 0x00ff00f0;
5530 break;
5531 default:
5532 break;
5535 fmt = GET_RTX_FORMAT (code);
5537 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5539 if (fmt[i] == 'E')
5541 int j;
5542 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5543 used |= regs_used (XVECEXP (x, i, j), is_dest);
5545 else if (fmt[i] == 'e')
5546 used |= regs_used (XEXP (x, i), is_dest);
5548 return used;
5551 /* Create an instruction that prevents redirection of a conditional branch
5552 to the destination of the JUMP with address ADDR.
5553 If the branch needs to be implemented as an indirect jump, try to find
5554 a scratch register for it.
5555 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5556 If any preceding insn that doesn't fit into a delay slot is good enough,
5557 pass 1. Pass 2 if a definite blocking insn is needed.
5558 -1 is used internally to avoid deep recursion.
5559 If a blocking instruction is made or recognized, return it. */
5560 static rtx
5561 gen_block_redirect (rtx jump, int addr, int need_block)
5563 int dead = 0;
5564 rtx prev = prev_nonnote_insn (jump);
5565 rtx dest;
5567 /* First, check if we already have an instruction that satisfies our need. */
5568 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
5570 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5571 return prev;
5572 if (GET_CODE (PATTERN (prev)) == USE
5573 || GET_CODE (PATTERN (prev)) == CLOBBER
5574 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5575 prev = jump;
5576 else if ((need_block &= ~1) < 0)
5577 return prev;
5578 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5579 need_block = 0;
5581 if (GET_CODE (PATTERN (jump)) == RETURN)
5583 if (! need_block)
5584 return prev;
5585 /* Reorg even does nasty things with return insns that cause branches
5586 to go out of range - see find_end_label and callers. */
5587 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5589 /* We can't use JUMP_LABEL here because it might be undefined
5590 when not optimizing. */
5591 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5592 /* If the branch is out of range, try to find a scratch register for it. */
5593 if (optimize
5594 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5595 > 4092 + 4098))
5597 rtx scan;
5598 /* Don't look for the stack pointer as a scratch register,
5599 it would cause trouble if an interrupt occurred. */
5600 unsigned attempt = 0x7fff, used;
5601 int jump_left = flag_expensive_optimizations + 1;
5603 /* It is likely that the most recent eligible instruction is wanted for
5604 the delay slot. Therefore, find out which registers it uses, and
5605 try to avoid using them. */
5607 for (scan = jump; (scan = PREV_INSN (scan)); )
5609 enum rtx_code code;
5611 if (INSN_DELETED_P (scan))
5612 continue;
5613 code = GET_CODE (scan);
5614 if (code == CODE_LABEL || code == JUMP_INSN)
5615 break;
5616 if (code == INSN
5617 && GET_CODE (PATTERN (scan)) != USE
5618 && GET_CODE (PATTERN (scan)) != CLOBBER
5619 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5621 attempt &= ~regs_used (PATTERN (scan), 0);
5622 break;
5625 for (used = dead = 0, scan = JUMP_LABEL (jump);
5626 (scan = NEXT_INSN (scan)); )
5628 enum rtx_code code;
5630 if (INSN_DELETED_P (scan))
5631 continue;
5632 code = GET_CODE (scan);
5633 if (INSN_P (scan))
5635 used |= regs_used (PATTERN (scan), 0);
5636 if (code == CALL_INSN)
5637 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5638 dead |= (used >> 16) & ~used;
5639 if (dead & attempt)
5641 dead &= attempt;
5642 break;
5644 if (code == JUMP_INSN)
5646 if (jump_left-- && simplejump_p (scan))
5647 scan = JUMP_LABEL (scan);
5648 else
5649 break;
5653 /* Mask out the stack pointer again, in case it was
5654 the only 'free' register we have found. */
5655 dead &= 0x7fff;
5657 /* If the immediate destination is still in range, check for possible
5658 threading with a jump beyond the delay slot insn.
5659 Don't check if we are called recursively; the jump has been or will be
5660 checked in a different invocation then. */
5662 else if (optimize && need_block >= 0)
5664 rtx next = next_active_insn (next_active_insn (dest));
5665 if (next && JUMP_P (next)
5666 && GET_CODE (PATTERN (next)) == SET
5667 && recog_memoized (next) == CODE_FOR_jump_compact)
5669 dest = JUMP_LABEL (next);
5670 if (dest
5671 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5672 > 4092 + 4098))
5673 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5677 if (dead)
5679 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5681 /* It would be nice if we could convert the jump into an indirect
5682 jump / far branch right now, and thus exposing all constituent
5683 instructions to further optimization. However, reorg uses
5684 simplejump_p to determine if there is an unconditional jump where
5685 it should try to schedule instructions from the target of the
5686 branch; simplejump_p fails for indirect jumps even if they have
5687 a JUMP_LABEL. */
5688 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5689 (reg, GEN_INT (unspec_bbr_uid++)),
5690 jump);
5691 /* ??? We would like this to have the scope of the jump, but that
5692 scope will change when a delay slot insn of an inner scope is added.
5693 Hence, after delay slot scheduling, we'll have to expect
5694 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5695 the jump. */
5697 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5698 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5699 return insn;
5701 else if (need_block)
5702 /* We can't use JUMP_LABEL here because it might be undefined
5703 when not optimizing. */
5704 return emit_insn_before (gen_block_branch_redirect
5705 (GEN_INT (unspec_bbr_uid++)),
5706 jump);
5707 return prev;
5710 #define CONDJUMP_MIN -252
5711 #define CONDJUMP_MAX 262
5712 struct far_branch
5714 /* A label (to be placed) in front of the jump
5715 that jumps to our ultimate destination. */
5716 rtx near_label;
5717 /* Where we are going to insert it if we cannot move the jump any farther,
5718 or the jump itself if we have picked up an existing jump. */
5719 rtx insert_place;
5720 /* The ultimate destination. */
5721 rtx far_label;
5722 struct far_branch *prev;
5723 /* If the branch has already been created, its address;
5724 else the address of its first prospective user. */
5725 int address;
5728 static void gen_far_branch (struct far_branch *);
5729 enum mdep_reorg_phase_e mdep_reorg_phase;
5730 static void
5731 gen_far_branch (struct far_branch *bp)
5733 rtx insn = bp->insert_place;
5734 rtx jump;
5735 rtx label = gen_label_rtx ();
5736 int ok;
5738 emit_label_after (label, insn);
5739 if (bp->far_label)
5741 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5742 LABEL_NUSES (bp->far_label)++;
5744 else
5745 jump = emit_jump_insn_after (gen_return (), insn);
5747 /* Emit a barrier so that reorg knows that any following instructions
5748 are not reachable via a fall-through path.
5749 But don't do this when not optimizing, since we wouldn't suppress the
5750 alignment for the barrier then, and could end up with out-of-range
5751 pc-relative loads. */
5752 if (optimize)
5753 emit_barrier_after (jump);
5754 emit_label_after (bp->near_label, insn);
5756 if (bp->far_label)
5757 JUMP_LABEL (jump) = bp->far_label;
5758 else
5760 rtx pat = PATTERN (jump);
5761 gcc_assert (ANY_RETURN_P (pat));
5762 JUMP_LABEL (jump) = pat;
5765 ok = invert_jump (insn, label, 1);
5766 gcc_assert (ok);
5768 /* If we are branching around a jump (rather than a return), prevent
5769 reorg from using an insn from the jump target as the delay slot insn -
5770 when reorg did this, it pessimized code (we rather hide the delay slot)
5771 and it could cause branches to go out of range. */
5772 if (bp->far_label)
5773 (emit_insn_after
5774 (gen_stuff_delay_slot
5775 (GEN_INT (unspec_bbr_uid++),
5776 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5777 insn));
5778 /* Prevent reorg from undoing our splits. */
5779 gen_block_redirect (jump, bp->address += 2, 2);
5782 /* Fix up ADDR_DIFF_VECs. */
5783 void
5784 fixup_addr_diff_vecs (rtx first)
5786 rtx insn;
5788 for (insn = first; insn; insn = NEXT_INSN (insn))
5790 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5792 if (! JUMP_TABLE_DATA_P (insn)
5793 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5794 continue;
5795 pat = PATTERN (insn);
5796 vec_lab = XEXP (XEXP (pat, 0), 0);
5798 /* Search the matching casesi_jump_2. */
5799 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5801 if (!JUMP_P (prev))
5802 continue;
5803 prevpat = PATTERN (prev);
5804 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5805 continue;
5806 x = XVECEXP (prevpat, 0, 1);
5807 if (GET_CODE (x) != USE)
5808 continue;
5809 x = XEXP (x, 0);
5810 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5811 break;
5813 /* FIXME: This is a bug in the optimizer, but it seems harmless
5814 to just avoid panicing. */
5815 if (!prev)
5816 continue;
5818 /* Emit the reference label of the braf where it belongs, right after
5819 the casesi_jump_2 (i.e. braf). */
5820 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5821 emit_label_after (braf_label, prev);
5823 /* Fix up the ADDR_DIF_VEC to be relative
5824 to the reference address of the braf. */
5825 XEXP (XEXP (pat, 0), 0) = braf_label;
5829 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5830 a barrier. Return the base 2 logarithm of the desired alignment. */
5832 barrier_align (rtx barrier_or_label)
5834 rtx next, pat;
5836 if (! barrier_or_label)
5837 return 0;
5839 if (LABEL_P (barrier_or_label)
5840 && NEXT_INSN (barrier_or_label)
5841 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5842 return 2;
5844 if (BARRIER_P (barrier_or_label)
5845 && PREV_INSN (barrier_or_label)
5846 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5848 pat = PATTERN (PREV_INSN (barrier_or_label));
5849 /* If this is a very small table, we want to keep the alignment after
5850 the table to the minimum for proper code alignment. */
5851 return ((optimize_size
5852 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5853 <= (unsigned) 1 << (CACHE_LOG - 2)))
5854 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5857 next = next_active_insn (barrier_or_label);
5859 if (! next)
5860 return 0;
5862 pat = PATTERN (next);
5864 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5865 /* This is a barrier in front of a constant table. */
5866 return 0;
5868 if (optimize_size)
5869 return 0;
5871 if (! TARGET_SH2 || ! optimize)
5872 return align_jumps_log;
5874 /* When fixing up pcloads, a constant table might be inserted just before
5875 the basic block that ends with the barrier. Thus, we can't trust the
5876 instruction lengths before that. */
5877 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5879 /* Check if there is an immediately preceding branch to the insn beyond
5880 the barrier. We must weight the cost of discarding useful information
5881 from the current cache line when executing this branch and there is
5882 an alignment, against that of fetching unneeded insn in front of the
5883 branch target when there is no alignment. */
5885 /* There are two delay_slot cases to consider. One is the simple case
5886 where the preceding branch is to the insn beyond the barrier (simple
5887 delay slot filling), and the other is where the preceding branch has
5888 a delay slot that is a duplicate of the insn after the barrier
5889 (fill_eager_delay_slots) and the branch is to the insn after the insn
5890 after the barrier. */
5892 int slot, credit;
5893 bool jump_to_next = false;
5895 /* Skip to the insn before the JUMP_INSN before the barrier under
5896 investigation. */
5897 rtx prev = prev_real_insn (prev_active_insn (barrier_or_label));
5899 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5900 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5901 prev = prev_real_insn (prev))
5903 jump_to_next = false;
5904 if (GET_CODE (PATTERN (prev)) == USE
5905 || GET_CODE (PATTERN (prev)) == CLOBBER)
5906 continue;
5907 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5909 prev = XVECEXP (PATTERN (prev), 0, 1);
5910 if (INSN_UID (prev) == INSN_UID (next))
5912 /* Delay slot was filled with insn at jump target. */
5913 jump_to_next = true;
5914 continue;
5918 if (slot &&
5919 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5920 slot = 0;
5921 credit -= get_attr_length (prev);
5923 if (prev && jump_to_label_p (prev))
5925 rtx x;
5926 if (jump_to_next
5927 || next_real_insn (JUMP_LABEL (prev)) == next
5928 /* If relax_delay_slots() decides NEXT was redundant
5929 with some previous instruction, it will have
5930 redirected PREV's jump to the following insn. */
5931 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5932 /* There is no upper bound on redundant instructions
5933 that might have been skipped, but we must not put an
5934 alignment where none had been before. */
5935 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5936 (INSN_P (x)
5937 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5938 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5939 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5941 rtx pat = PATTERN (prev);
5942 if (GET_CODE (pat) == PARALLEL)
5943 pat = XVECEXP (pat, 0, 0);
5944 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5945 return 0;
5950 return align_jumps_log;
5953 /* If we are inside a phony loop, almost any kind of label can turn up as the
5954 first one in the loop. Aligning a braf label causes incorrect switch
5955 destination addresses; we can detect braf labels because they are
5956 followed by a BARRIER.
5957 Applying loop alignment to small constant or switch tables is a waste
5958 of space, so we suppress this too. */
5960 sh_loop_align (rtx label)
5962 rtx next = label;
5964 if (! optimize || optimize_size)
5965 return 0;
5968 next = next_nonnote_insn (next);
5969 while (next && LABEL_P (next));
5971 if (! next
5972 || ! INSN_P (next)
5973 || recog_memoized (next) == CODE_FOR_consttable_2)
5974 return 0;
5976 return align_loops_log;
5979 /* Do a final pass over the function, just before delayed branch
5980 scheduling. */
5981 static void
5982 sh_reorg (void)
5984 rtx first, insn, mova = NULL_RTX;
5985 int num_mova;
5986 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5987 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5989 first = get_insns ();
5990 max_labelno_before_reorg = max_label_num ();
5992 /* We must split call insns before introducing `mova's. If we're
5993 optimizing, they'll have already been split. Otherwise, make
5994 sure we don't split them too late. */
5995 if (! optimize)
5996 split_all_insns_noflow ();
5998 if (TARGET_SHMEDIA)
5999 return;
6001 /* If relaxing, generate pseudo-ops to associate function calls with
6002 the symbols they call. It does no harm to not generate these
6003 pseudo-ops. However, when we can generate them, it enables the
6004 linker to potentially relax the jsr to a bsr, and eliminate the
6005 register load and, possibly, the constant pool entry. */
6007 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6008 if (TARGET_RELAX)
6010 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6011 own purposes. This works because none of the remaining passes
6012 need to look at them.
6014 ??? But it may break in the future. We should use a machine
6015 dependent REG_NOTE, or some other approach entirely. */
6016 for (insn = first; insn; insn = NEXT_INSN (insn))
6018 if (INSN_P (insn))
6020 rtx note;
6022 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6023 NULL_RTX)) != 0)
6024 remove_note (insn, note);
6028 for (insn = first; insn; insn = NEXT_INSN (insn))
6030 rtx pattern, reg, link, set, scan, dies, label;
6031 int rescan = 0, foundinsn = 0;
6033 if (CALL_P (insn))
6035 pattern = PATTERN (insn);
6037 if (GET_CODE (pattern) == PARALLEL)
6038 pattern = XVECEXP (pattern, 0, 0);
6039 if (GET_CODE (pattern) == SET)
6040 pattern = SET_SRC (pattern);
6042 if (GET_CODE (pattern) != CALL
6043 || !MEM_P (XEXP (pattern, 0)))
6044 continue;
6046 reg = XEXP (XEXP (pattern, 0), 0);
6048 else
6050 reg = sfunc_uses_reg (insn);
6051 if (! reg)
6052 continue;
6055 if (!REG_P (reg))
6056 continue;
6058 /* Try scanning backward to find where the register is set. */
6059 link = NULL;
6060 for (scan = PREV_INSN (insn);
6061 scan && !LABEL_P (scan);
6062 scan = PREV_INSN (scan))
6064 if (! INSN_P (scan))
6065 continue;
6067 if (! reg_mentioned_p (reg, scan))
6068 continue;
6070 if (noncall_uses_reg (reg, scan, &set))
6071 break;
6073 if (set)
6075 link = scan;
6076 break;
6080 if (! link)
6081 continue;
6083 /* The register is set at LINK. */
6085 /* We can only optimize the function call if the register is
6086 being set to a symbol. In theory, we could sometimes
6087 optimize calls to a constant location, but the assembler
6088 and linker do not support that at present. */
6089 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6090 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6091 continue;
6093 /* Scan forward from LINK to the place where REG dies, and
6094 make sure that the only insns which use REG are
6095 themselves function calls. */
6097 /* ??? This doesn't work for call targets that were allocated
6098 by reload, since there may not be a REG_DEAD note for the
6099 register. */
6101 dies = NULL_RTX;
6102 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6104 rtx scanset;
6106 /* Don't try to trace forward past a CODE_LABEL if we haven't
6107 seen INSN yet. Ordinarily, we will only find the setting insn
6108 if it is in the same basic block. However,
6109 cross-jumping can insert code labels in between the load and
6110 the call, and can result in situations where a single call
6111 insn may have two targets depending on where we came from. */
6113 if (LABEL_P (scan) && ! foundinsn)
6114 break;
6116 if (! INSN_P (scan))
6117 continue;
6119 /* Don't try to trace forward past a JUMP. To optimize
6120 safely, we would have to check that all the
6121 instructions at the jump destination did not use REG. */
6123 if (JUMP_P (scan))
6124 break;
6126 if (! reg_mentioned_p (reg, scan))
6127 continue;
6129 if (noncall_uses_reg (reg, scan, &scanset))
6130 break;
6132 if (scan == insn)
6133 foundinsn = 1;
6135 if (scan != insn
6136 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6138 /* There is a function call to this register other
6139 than the one we are checking. If we optimize
6140 this call, we need to rescan again below. */
6141 rescan = 1;
6144 /* ??? We shouldn't have to worry about SCANSET here.
6145 We should just be able to check for a REG_DEAD note
6146 on a function call. However, the REG_DEAD notes are
6147 apparently not dependable around libcalls; c-torture
6148 execute/920501-2 is a test case. If SCANSET is set,
6149 then this insn sets the register, so it must have
6150 died earlier. Unfortunately, this will only handle
6151 the cases in which the register is, in fact, set in a
6152 later insn. */
6154 /* ??? We shouldn't have to use FOUNDINSN here.
6155 This dates back to when we used LOG_LINKS to find
6156 the most recent insn which sets the register. */
6158 if (foundinsn
6159 && (scanset
6160 || find_reg_note (scan, REG_DEAD, reg)))
6162 dies = scan;
6163 break;
6167 if (! dies)
6169 /* Either there was a branch, or some insn used REG
6170 other than as a function call address. */
6171 continue;
6174 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6175 on the insn which sets the register, and on each call insn
6176 which uses the register. In final_prescan_insn we look for
6177 the REG_LABEL_OPERAND notes, and output the appropriate label
6178 or pseudo-op. */
6180 label = gen_label_rtx ();
6181 add_reg_note (link, REG_LABEL_OPERAND, label);
6182 add_reg_note (insn, REG_LABEL_OPERAND, label);
6183 if (rescan)
6185 scan = link;
6188 rtx reg2;
6190 scan = NEXT_INSN (scan);
6191 if (scan != insn
6192 && ((CALL_P (scan)
6193 && reg_mentioned_p (reg, scan))
6194 || ((reg2 = sfunc_uses_reg (scan))
6195 && REGNO (reg2) == REGNO (reg))))
6196 add_reg_note (scan, REG_LABEL_OPERAND, label);
6198 while (scan != dies);
6203 if (TARGET_SH2)
6204 fixup_addr_diff_vecs (first);
6206 if (optimize)
6208 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6209 shorten_branches (first);
6212 /* Scan the function looking for move instructions which have to be
6213 changed to pc-relative loads and insert the literal tables. */
6214 label_ref_list_pool = create_alloc_pool ("label references list",
6215 sizeof (struct label_ref_list_d),
6216 30);
6217 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6218 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6220 if (mova_p (insn))
6222 /* ??? basic block reordering can move a switch table dispatch
6223 below the switch table. Check if that has happened.
6224 We only have the addresses available when optimizing; but then,
6225 this check shouldn't be needed when not optimizing. */
6226 if (!untangle_mova (&num_mova, &mova, insn))
6228 insn = mova;
6229 num_mova = 0;
6232 else if (JUMP_TABLE_DATA_P (insn)
6233 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6234 && num_mova
6235 /* ??? loop invariant motion can also move a mova out of a
6236 loop. Since loop does this code motion anyway, maybe we
6237 should wrap UNSPEC_MOVA into a CONST, so that reload can
6238 move it back. */
6239 && ((num_mova > 1
6240 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6241 || (prev_nonnote_insn (insn)
6242 == XEXP (MOVA_LABELREF (mova), 0))))
6244 rtx scan;
6245 int total;
6247 num_mova--;
6249 /* Some code might have been inserted between the mova and
6250 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6251 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6252 total += get_attr_length (scan);
6254 /* range of mova is 1020, add 4 because pc counts from address of
6255 second instruction after this one, subtract 2 in case pc is 2
6256 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6257 cancels out with alignment effects of the mova itself. */
6258 if (total > 1022)
6260 /* Change the mova into a load, and restart scanning
6261 there. broken_move will then return true for mova. */
6262 fixup_mova (mova);
6263 insn = mova;
6266 if (broken_move (insn)
6267 || (NONJUMP_INSN_P (insn)
6268 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6270 rtx scan;
6271 /* Scan ahead looking for a barrier to stick the constant table
6272 behind. */
6273 rtx barrier = find_barrier (num_mova, mova, insn);
6274 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
6275 int need_aligned_label = 0;
6277 if (num_mova && ! mova_p (mova))
6279 /* find_barrier had to change the first mova into a
6280 pcload; thus, we have to start with this new pcload. */
6281 insn = mova;
6282 num_mova = 0;
6284 /* Now find all the moves between the points and modify them. */
6285 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6287 if (LABEL_P (scan))
6288 last_float = 0;
6289 if (NONJUMP_INSN_P (scan)
6290 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6291 need_aligned_label = 1;
6292 if (broken_move (scan))
6294 rtx *patp = &PATTERN (scan), pat = *patp;
6295 rtx src, dst;
6296 rtx lab;
6297 rtx newsrc;
6298 enum machine_mode mode;
6300 if (GET_CODE (pat) == PARALLEL)
6301 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6302 src = SET_SRC (pat);
6303 dst = SET_DEST (pat);
6304 mode = GET_MODE (dst);
6306 if (mode == SImode && satisfies_constraint_I16 (src)
6307 && REGNO (dst) != FPUL_REG)
6309 int offset = 0;
6311 mode = HImode;
6312 while (GET_CODE (dst) == SUBREG)
6314 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6315 GET_MODE (SUBREG_REG (dst)),
6316 SUBREG_BYTE (dst),
6317 GET_MODE (dst));
6318 dst = SUBREG_REG (dst);
6320 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6322 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6324 /* This must be an insn that clobbers r0. */
6325 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6326 XVECLEN (PATTERN (scan), 0)
6327 - 1);
6328 rtx clobber = *clobberp;
6330 gcc_assert (GET_CODE (clobber) == CLOBBER
6331 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6333 if (last_float
6334 && reg_set_between_p (r0_rtx, last_float_move, scan))
6335 last_float = 0;
6336 if (last_float
6337 && TARGET_SHCOMPACT
6338 && GET_MODE_SIZE (mode) != 4
6339 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6340 last_float = 0;
6341 lab = add_constant (src, mode, last_float);
6342 if (lab)
6343 emit_insn_before (gen_mova (lab), scan);
6344 else
6346 /* There will be a REG_UNUSED note for r0 on
6347 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6348 lest reorg:mark_target_live_regs will not
6349 consider r0 to be used, and we end up with delay
6350 slot insn in front of SCAN that clobbers r0. */
6351 rtx note
6352 = find_regno_note (last_float_move, REG_UNUSED, 0);
6354 /* If we are not optimizing, then there may not be
6355 a note. */
6356 if (note)
6357 PUT_REG_NOTE_KIND (note, REG_INC);
6359 *last_float_addr = r0_inc_rtx;
6361 last_float_move = scan;
6362 last_float = src;
6363 newsrc = gen_const_mem (mode,
6364 (((TARGET_SH4 && ! TARGET_FMOVD)
6365 || REGNO (dst) == FPUL_REG)
6366 ? r0_inc_rtx
6367 : r0_rtx));
6368 last_float_addr = &XEXP (newsrc, 0);
6370 /* Remove the clobber of r0. */
6371 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6372 gen_rtx_SCRATCH (Pmode));
6374 /* This is a mova needing a label. Create it. */
6375 else if (GET_CODE (src) == UNSPEC
6376 && XINT (src, 1) == UNSPEC_MOVA
6377 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6379 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6380 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6381 newsrc = gen_rtx_UNSPEC (SImode,
6382 gen_rtvec (1, newsrc),
6383 UNSPEC_MOVA);
6385 else if (GET_CODE (src) == UNSPEC_VOLATILE
6386 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6388 newsrc = XVECEXP (src, 0, 0);
6389 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6390 INSN_CODE (scan) = -1;
6391 continue;
6393 else
6395 lab = add_constant (src, mode, 0);
6396 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6397 newsrc = gen_const_mem (mode, newsrc);
6399 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
6400 INSN_CODE (scan) = -1;
6403 dump_table (need_aligned_label ? insn : 0, barrier);
6404 insn = barrier;
6407 free_alloc_pool (label_ref_list_pool);
6408 for (insn = first; insn; insn = NEXT_INSN (insn))
6409 PUT_MODE (insn, VOIDmode);
6411 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6412 INSN_ADDRESSES_FREE ();
6413 split_branches (first);
6415 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6416 also has an effect on the register that holds the address of the sfunc.
6417 Insert an extra dummy insn in front of each sfunc that pretends to
6418 use this register. */
6419 if (flag_delayed_branch)
6421 for (insn = first; insn; insn = NEXT_INSN (insn))
6423 rtx reg = sfunc_uses_reg (insn);
6425 if (! reg)
6426 continue;
6427 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6430 #if 0
6431 /* fpscr is not actually a user variable, but we pretend it is for the
6432 sake of the previous optimization passes, since we want it handled like
6433 one. However, we don't have any debugging information for it, so turn
6434 it into a non-user variable now. */
6435 if (TARGET_SH4)
6436 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
6437 #endif
6438 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6441 /* Return the UID of the insn that follows the specified label. */
6443 get_dest_uid (rtx label, int max_uid)
6445 rtx dest = next_real_insn (label);
6446 int dest_uid;
6447 if (! dest)
6448 /* This can happen for an undefined label. */
6449 return 0;
6450 dest_uid = INSN_UID (dest);
6451 /* If this is a newly created branch redirection blocking instruction,
6452 we cannot index the branch_uid or insn_addresses arrays with its
6453 uid. But then, we won't need to, because the actual destination is
6454 the following branch. */
6455 while (dest_uid >= max_uid)
6457 dest = NEXT_INSN (dest);
6458 dest_uid = INSN_UID (dest);
6460 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6461 return 0;
6462 return dest_uid;
6465 /* Split condbranches that are out of range. Also add clobbers for
6466 scratch registers that are needed in far jumps.
6467 We do this before delay slot scheduling, so that it can take our
6468 newly created instructions into account. It also allows us to
6469 find branches with common targets more easily. */
6470 static void
6471 split_branches (rtx first)
6473 rtx insn;
6474 struct far_branch **uid_branch, *far_branch_list = 0;
6475 int max_uid = get_max_uid ();
6476 int ok;
6478 /* Find out which branches are out of range. */
6479 shorten_branches (first);
6481 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6482 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6484 for (insn = first; insn; insn = NEXT_INSN (insn))
6485 if (! INSN_P (insn))
6486 continue;
6487 else if (INSN_DELETED_P (insn))
6489 /* Shorten_branches would split this instruction again,
6490 so transform it into a note. */
6491 SET_INSN_DELETED (insn);
6493 else if (JUMP_P (insn))
6495 enum attr_type type = get_attr_type (insn);
6496 if (type == TYPE_CBRANCH)
6498 rtx next, beyond;
6500 if (get_attr_length (insn) > 4)
6502 rtx src = SET_SRC (PATTERN (insn));
6503 rtx olabel = XEXP (XEXP (src, 1), 0);
6504 int addr = INSN_ADDRESSES (INSN_UID (insn));
6505 rtx label = 0;
6506 int dest_uid = get_dest_uid (olabel, max_uid);
6507 struct far_branch *bp = uid_branch[dest_uid];
6509 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6510 the label if the LABEL_NUSES count drops to zero. There is
6511 always a jump_optimize pass that sets these values, but it
6512 proceeds to delete unreferenced code, and then if not
6513 optimizing, to un-delete the deleted instructions, thus
6514 leaving labels with too low uses counts. */
6515 if (! optimize)
6517 JUMP_LABEL (insn) = olabel;
6518 LABEL_NUSES (olabel)++;
6520 if (! bp)
6522 bp = (struct far_branch *) alloca (sizeof *bp);
6523 uid_branch[dest_uid] = bp;
6524 bp->prev = far_branch_list;
6525 far_branch_list = bp;
6526 bp->far_label
6527 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6528 LABEL_NUSES (bp->far_label)++;
6530 else
6532 label = bp->near_label;
6533 if (! label && bp->address - addr >= CONDJUMP_MIN)
6535 rtx block = bp->insert_place;
6537 if (GET_CODE (PATTERN (block)) == RETURN)
6538 block = PREV_INSN (block);
6539 else
6540 block = gen_block_redirect (block,
6541 bp->address, 2);
6542 label = emit_label_after (gen_label_rtx (),
6543 PREV_INSN (block));
6544 bp->near_label = label;
6546 else if (label && ! NEXT_INSN (label))
6548 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6549 bp->insert_place = insn;
6550 else
6551 gen_far_branch (bp);
6554 if (! label
6555 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6557 bp->near_label = label = gen_label_rtx ();
6558 bp->insert_place = insn;
6559 bp->address = addr;
6561 ok = redirect_jump (insn, label, 0);
6562 gcc_assert (ok);
6564 else
6566 /* get_attr_length (insn) == 2 */
6567 /* Check if we have a pattern where reorg wants to redirect
6568 the branch to a label from an unconditional branch that
6569 is too far away. */
6570 /* We can't use JUMP_LABEL here because it might be undefined
6571 when not optimizing. */
6572 /* A syntax error might cause beyond to be NULL_RTX. */
6573 beyond
6574 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6575 0));
6577 if (beyond
6578 && (JUMP_P (beyond)
6579 || ((beyond = next_active_insn (beyond))
6580 && JUMP_P (beyond)))
6581 && GET_CODE (PATTERN (beyond)) == SET
6582 && recog_memoized (beyond) == CODE_FOR_jump_compact
6583 && ((INSN_ADDRESSES
6584 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6585 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6586 > 252 + 258 + 2))
6587 gen_block_redirect (beyond,
6588 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6591 next = next_active_insn (insn);
6593 if (next
6594 && (JUMP_P (next)
6595 || ((next = next_active_insn (next))
6596 && JUMP_P (next)))
6597 && GET_CODE (PATTERN (next)) == SET
6598 && recog_memoized (next) == CODE_FOR_jump_compact
6599 && ((INSN_ADDRESSES
6600 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6601 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6602 > 252 + 258 + 2))
6603 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6605 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6607 int addr = INSN_ADDRESSES (INSN_UID (insn));
6608 rtx far_label = 0;
6609 int dest_uid = 0;
6610 struct far_branch *bp;
6612 if (type == TYPE_JUMP)
6614 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6615 dest_uid = get_dest_uid (far_label, max_uid);
6616 if (! dest_uid)
6618 /* Parse errors can lead to labels outside
6619 the insn stream. */
6620 if (! NEXT_INSN (far_label))
6621 continue;
6623 if (! optimize)
6625 JUMP_LABEL (insn) = far_label;
6626 LABEL_NUSES (far_label)++;
6628 redirect_jump (insn, ret_rtx, 1);
6629 far_label = 0;
6632 bp = uid_branch[dest_uid];
6633 if (! bp)
6635 bp = (struct far_branch *) alloca (sizeof *bp);
6636 uid_branch[dest_uid] = bp;
6637 bp->prev = far_branch_list;
6638 far_branch_list = bp;
6639 bp->near_label = 0;
6640 bp->far_label = far_label;
6641 if (far_label)
6642 LABEL_NUSES (far_label)++;
6644 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6645 if (addr - bp->address <= CONDJUMP_MAX)
6646 emit_label_after (bp->near_label, PREV_INSN (insn));
6647 else
6649 gen_far_branch (bp);
6650 bp->near_label = 0;
6652 else
6653 bp->near_label = 0;
6654 bp->address = addr;
6655 bp->insert_place = insn;
6656 if (! far_label)
6657 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6658 else
6659 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6662 /* Generate all pending far branches,
6663 and free our references to the far labels. */
6664 while (far_branch_list)
6666 if (far_branch_list->near_label
6667 && ! NEXT_INSN (far_branch_list->near_label))
6668 gen_far_branch (far_branch_list);
6669 if (optimize
6670 && far_branch_list->far_label
6671 && ! --LABEL_NUSES (far_branch_list->far_label))
6672 delete_insn (far_branch_list->far_label);
6673 far_branch_list = far_branch_list->prev;
6676 /* Instruction length information is no longer valid due to the new
6677 instructions that have been generated. */
6678 init_insn_lengths ();
6681 /* Dump out instruction addresses, which is useful for debugging the
6682 constant pool table stuff.
6684 If relaxing, output the label and pseudo-ops used to link together
6685 calls and the instruction which set the registers.
6687 ??? The addresses printed by this routine for insns are nonsense for
6688 insns which are inside of a sequence where none of the inner insns have
6689 variable length. This is because the second pass of shorten_branches
6690 does not bother to update them. */
6691 void
6692 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6693 int noperands ATTRIBUTE_UNUSED)
6695 if (TARGET_DUMPISIZE)
6696 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6698 if (TARGET_RELAX)
6700 rtx note;
6702 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6703 if (note)
6705 rtx pattern;
6707 pattern = PATTERN (insn);
6708 if (GET_CODE (pattern) == PARALLEL)
6709 pattern = XVECEXP (pattern, 0, 0);
6710 switch (GET_CODE (pattern))
6712 case SET:
6713 if (GET_CODE (SET_SRC (pattern)) != CALL
6714 && get_attr_type (insn) != TYPE_SFUNC)
6716 targetm.asm_out.internal_label
6717 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6718 break;
6720 /* else FALLTHROUGH */
6721 case CALL:
6722 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6723 CODE_LABEL_NUMBER (XEXP (note, 0)));
6724 break;
6726 default:
6727 gcc_unreachable ();
6733 /* Dump out any constants accumulated in the final pass. These will
6734 only be labels. */
6735 const char *
6736 output_jump_label_table (void)
6738 int i;
6740 if (pool_size)
6742 fprintf (asm_out_file, "\t.align 2\n");
6743 for (i = 0; i < pool_size; i++)
6745 pool_node *p = &pool_vector[i];
6747 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6748 CODE_LABEL_NUMBER (p->label));
6749 output_asm_insn (".long %O0", &p->value);
6751 pool_size = 0;
6754 return "";
6757 /* A full frame looks like:
6759 arg-5
6760 arg-4
6761 [ if current_function_anonymous_args
6762 arg-3
6763 arg-2
6764 arg-1
6765 arg-0 ]
6766 saved-fp
6767 saved-r10
6768 saved-r11
6769 saved-r12
6770 saved-pr
6771 local-n
6773 local-1
6774 local-0 <- fp points here.
6776 Number of bytes pushed for anonymous args, used to pass information
6777 between expand_prologue and expand_epilogue.
6779 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6780 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6781 for an epilogue and a negative value means that it's for a sibcall
6782 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6783 all the registers that are about to be restored, and hence dead. */
6784 static void
6785 output_stack_adjust (int size, rtx reg, int epilogue_p,
6786 HARD_REG_SET *live_regs_mask, bool frame_p)
6788 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6789 if (size)
6791 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6793 /* This test is bogus, as output_stack_adjust is used to re-align the
6794 stack. */
6795 #if 0
6796 gcc_assert (!(size % align));
6797 #endif
6799 if (CONST_OK_FOR_ADD (size))
6800 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6801 /* Try to do it with two partial adjustments; however, we must make
6802 sure that the stack is properly aligned at all times, in case
6803 an interrupt occurs between the two partial adjustments. */
6804 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6805 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6807 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6808 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6810 else
6812 rtx const_reg;
6813 rtx insn;
6814 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6815 int i;
6817 /* If TEMP is invalid, we could temporarily save a general
6818 register to MACL. However, there is currently no need
6819 to handle this case, so just die when we see it. */
6820 if (epilogue_p < 0
6821 || current_function_interrupt
6822 || ! call_really_used_regs[temp] || fixed_regs[temp])
6823 temp = -1;
6824 if (temp < 0 && ! current_function_interrupt
6825 && (TARGET_SHMEDIA || epilogue_p >= 0))
6827 HARD_REG_SET temps;
6828 COPY_HARD_REG_SET (temps, call_used_reg_set);
6829 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6830 if (epilogue_p > 0)
6832 int nreg = 0;
6833 if (crtl->return_rtx)
6835 enum machine_mode mode;
6836 mode = GET_MODE (crtl->return_rtx);
6837 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6838 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6840 for (i = 0; i < nreg; i++)
6841 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6842 if (crtl->calls_eh_return)
6844 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6845 for (i = 0; i <= 3; i++)
6846 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6849 if (TARGET_SHMEDIA && epilogue_p < 0)
6850 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6851 CLEAR_HARD_REG_BIT (temps, i);
6852 if (epilogue_p <= 0)
6854 for (i = FIRST_PARM_REG;
6855 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6856 CLEAR_HARD_REG_BIT (temps, i);
6857 if (cfun->static_chain_decl != NULL)
6858 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6860 temp = scavenge_reg (&temps);
6862 if (temp < 0 && live_regs_mask)
6864 HARD_REG_SET temps;
6866 COPY_HARD_REG_SET (temps, *live_regs_mask);
6867 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6868 temp = scavenge_reg (&temps);
6870 if (temp < 0)
6872 rtx adj_reg, tmp_reg, mem;
6874 /* If we reached here, the most likely case is the (sibcall)
6875 epilogue for non SHmedia. Put a special push/pop sequence
6876 for such case as the last resort. This looks lengthy but
6877 would not be problem because it seems to be very
6878 rare. */
6880 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6883 /* ??? There is still the slight possibility that r4 or
6884 r5 have been reserved as fixed registers or assigned
6885 as global registers, and they change during an
6886 interrupt. There are possible ways to handle this:
6888 - If we are adjusting the frame pointer (r14), we can do
6889 with a single temp register and an ordinary push / pop
6890 on the stack.
6891 - Grab any call-used or call-saved registers (i.e. not
6892 fixed or globals) for the temps we need. We might
6893 also grab r14 if we are adjusting the stack pointer.
6894 If we can't find enough available registers, issue
6895 a diagnostic and die - the user must have reserved
6896 way too many registers.
6897 But since all this is rather unlikely to happen and
6898 would require extra testing, we just die if r4 / r5
6899 are not available. */
6900 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6901 && !global_regs[4] && !global_regs[5]);
6903 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6904 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6905 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6906 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6907 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6908 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6909 emit_move_insn (mem, tmp_reg);
6910 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6911 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6912 emit_move_insn (mem, tmp_reg);
6913 emit_move_insn (reg, adj_reg);
6914 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6915 emit_move_insn (adj_reg, mem);
6916 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6917 emit_move_insn (tmp_reg, mem);
6918 /* Tell flow the insns that pop r4/r5 aren't dead. */
6919 emit_use (tmp_reg);
6920 emit_use (adj_reg);
6921 return;
6923 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6925 /* If SIZE is negative, subtract the positive value.
6926 This sometimes allows a constant pool entry to be shared
6927 between prologue and epilogue code. */
6928 if (size < 0)
6930 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6931 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6933 else
6935 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6936 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6938 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6939 gen_rtx_SET (VOIDmode, reg,
6940 gen_rtx_PLUS (SImode, reg,
6941 GEN_INT (size))));
6946 /* Emit the specified insn and mark it as frame related.
6947 FIXME: Rename this to emit_frame_insn. */
6948 static rtx
6949 frame_insn (rtx x)
6951 x = emit_insn (x);
6952 RTX_FRAME_RELATED_P (x) = 1;
6953 return x;
6956 /* Output RTL to push register RN onto the stack. */
6957 static rtx
6958 push (int rn)
6960 rtx x;
6961 if (rn == FPUL_REG)
6962 x = gen_push_fpul ();
6963 else if (rn == FPSCR_REG)
6964 x = gen_push_fpscr ();
6965 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6966 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6968 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6969 return NULL_RTX;
6970 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6972 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6973 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6974 else
6975 x = gen_push (gen_rtx_REG (SImode, rn));
6977 x = frame_insn (x);
6978 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6979 return x;
6982 /* Output RTL to pop register RN from the stack. */
6983 static void
6984 pop (int rn)
6986 rtx x, sp_reg, reg;
6987 if (rn == FPUL_REG)
6988 x = gen_pop_fpul ();
6989 else if (rn == FPSCR_REG)
6990 x = gen_pop_fpscr ();
6991 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6992 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6994 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6995 return;
6996 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6998 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6999 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
7000 else
7001 x = gen_pop (gen_rtx_REG (SImode, rn));
7003 x = emit_insn (x);
7005 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7006 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7007 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7008 : SET_DEST (PATTERN (x)));
7009 add_reg_note (x, REG_CFA_RESTORE, reg);
7010 add_reg_note (x, REG_CFA_ADJUST_CFA,
7011 gen_rtx_SET (SImode, sp_reg,
7012 plus_constant (SImode, sp_reg,
7013 GET_MODE_SIZE (GET_MODE (reg)))));
7014 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7015 RTX_FRAME_RELATED_P (x) = 1;
7018 /* Generate code to push the regs specified in the mask. */
7019 static void
7020 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7022 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7023 int skip_fpscr = 0;
7025 /* Push PR last; this gives better latencies after the prologue, and
7026 candidates for the return delay slot when there are no general
7027 registers pushed. */
7028 for (; i < FIRST_PSEUDO_REGISTER; i++)
7030 /* If this is an interrupt handler, and the SZ bit varies,
7031 and we have to push any floating point register, we need
7032 to switch to the correct precision first. */
7033 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7034 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7036 HARD_REG_SET unsaved;
7038 push (FPSCR_REG);
7039 COMPL_HARD_REG_SET (unsaved, *mask);
7040 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7041 skip_fpscr = 1;
7043 if (i != PR_REG
7044 && (i != FPSCR_REG || ! skip_fpscr)
7045 && TEST_HARD_REG_BIT (*mask, i))
7047 /* If the ISR has RESBANK attribute assigned, don't push any of
7048 the following registers - R0-R14, MACH, MACL and GBR. */
7049 if (! (sh_cfun_resbank_handler_p ()
7050 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7051 || i == MACH_REG
7052 || i == MACL_REG
7053 || i == GBR_REG)))
7054 push (i);
7058 /* Push banked registers last to improve delay slot opportunities. */
7059 if (interrupt_handler)
7061 bool use_movml = false;
7063 if (TARGET_SH2A)
7065 unsigned int count = 0;
7067 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7068 if (TEST_HARD_REG_BIT (*mask, i))
7069 count++;
7070 else
7071 break;
7073 /* Use movml when all banked registers are pushed. */
7074 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7075 use_movml = true;
7078 if (sh_cfun_resbank_handler_p ())
7079 ; /* Do nothing. */
7080 else if (use_movml)
7082 rtx x, mem, reg, set;
7083 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7085 /* We must avoid scheduling multiple store insn with another
7086 insns. */
7087 emit_insn (gen_blockage ());
7088 x = gen_movml_push_banked (sp_reg);
7089 x = frame_insn (x);
7090 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7092 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7093 reg = gen_rtx_REG (SImode, i);
7094 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
7097 set = gen_rtx_SET (SImode, sp_reg,
7098 plus_constant (Pmode, sp_reg, - 32));
7099 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7100 emit_insn (gen_blockage ());
7102 else
7103 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7104 if (TEST_HARD_REG_BIT (*mask, i))
7105 push (i);
7108 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7109 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7110 push (PR_REG);
7113 /* Calculate how much extra space is needed to save all callee-saved
7114 target registers.
7115 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7116 static int
7117 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7119 int reg;
7120 int stack_space = 0;
7121 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7123 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7124 if ((! call_really_used_regs[reg] || interrupt_handler)
7125 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7126 /* Leave space to save this target register on the stack,
7127 in case target register allocation wants to use it. */
7128 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7129 return stack_space;
7132 /* Decide whether we should reserve space for callee-save target registers,
7133 in case target register allocation wants to use them. REGS_SAVED is
7134 the space, in bytes, that is already required for register saves.
7135 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7136 static int
7137 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7138 HARD_REG_SET *live_regs_mask)
7140 if (optimize_size)
7141 return 0;
7142 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7145 /* Decide how much space to reserve for callee-save target registers
7146 in case target register allocation wants to use them.
7147 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7148 static int
7149 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7151 if (shmedia_space_reserved_for_target_registers)
7152 return shmedia_target_regs_stack_space (live_regs_mask);
7153 else
7154 return 0;
7157 /* Work out the registers which need to be saved, both as a mask and a
7158 count of saved words. Return the count.
7160 If doing a pragma interrupt function, then push all regs used by the
7161 function, and if we call another function (we can tell by looking at PR),
7162 make sure that all the regs it clobbers are safe too. */
7163 static int
7164 calc_live_regs (HARD_REG_SET *live_regs_mask)
7166 unsigned int reg;
7167 int count;
7168 tree attrs;
7169 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7170 bool nosave_low_regs;
7171 int pr_live, has_call;
7173 attrs = DECL_ATTRIBUTES (current_function_decl);
7174 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7175 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7176 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7177 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7179 CLEAR_HARD_REG_SET (*live_regs_mask);
7180 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7181 && df_regs_ever_live_p (FPSCR_REG))
7182 target_flags &= ~MASK_FPU_SINGLE;
7183 /* If we can save a lot of saves by switching to double mode, do that. */
7184 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7185 && TARGET_FPU_SINGLE)
7186 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7187 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7188 && (! call_really_used_regs[reg]
7189 || interrupt_handler)
7190 && ++count > 2)
7192 target_flags &= ~MASK_FPU_SINGLE;
7193 break;
7195 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7196 knows how to use it. That means the pseudo originally allocated for
7197 the initial value can become the PR_MEDIA_REG hard register, as seen for
7198 execute/20010122-1.c:test9. */
7199 if (TARGET_SHMEDIA)
7200 /* ??? this function is called from initial_elimination_offset, hence we
7201 can't use the result of sh_media_register_for_return here. */
7202 pr_live = sh_pr_n_sets ();
7203 else
7205 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7206 pr_live = (pr_initial
7207 ? (!REG_P (pr_initial)
7208 || REGNO (pr_initial) != (PR_REG))
7209 : df_regs_ever_live_p (PR_REG));
7210 /* For Shcompact, if not optimizing, we end up with a memory reference
7211 using the return address pointer for __builtin_return_address even
7212 though there is no actual need to put the PR register on the stack. */
7213 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7215 /* Force PR to be live if the prologue has to call the SHmedia
7216 argument decoder or register saver. */
7217 if (TARGET_SHCOMPACT
7218 && ((crtl->args.info.call_cookie
7219 & ~ CALL_COOKIE_RET_TRAMP (1))
7220 || crtl->saves_all_registers))
7221 pr_live = 1;
7222 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7223 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7225 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7226 ? pr_live
7227 : interrupt_handler
7228 ? (/* Need to save all the regs ever live. */
7229 (df_regs_ever_live_p (reg)
7230 || (call_really_used_regs[reg]
7231 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7232 || reg == PIC_OFFSET_TABLE_REGNUM)
7233 && has_call)
7234 || (TARGET_SHMEDIA && has_call
7235 && REGISTER_NATURAL_MODE (reg) == SImode
7236 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7237 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7238 && reg != RETURN_ADDRESS_POINTER_REGNUM
7239 && reg != T_REG && reg != GBR_REG
7240 /* Push fpscr only on targets which have FPU */
7241 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7242 : (/* Only push those regs which are used and need to be saved. */
7243 (TARGET_SHCOMPACT
7244 && flag_pic
7245 && crtl->args.info.call_cookie
7246 && reg == PIC_OFFSET_TABLE_REGNUM)
7247 || (df_regs_ever_live_p (reg)
7248 && ((!call_really_used_regs[reg]
7249 && !(reg != PIC_OFFSET_TABLE_REGNUM
7250 && fixed_regs[reg] && call_used_regs[reg]))
7251 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7252 || (crtl->calls_eh_return
7253 && (reg == EH_RETURN_DATA_REGNO (0)
7254 || reg == EH_RETURN_DATA_REGNO (1)
7255 || reg == EH_RETURN_DATA_REGNO (2)
7256 || reg == EH_RETURN_DATA_REGNO (3)))
7257 || ((reg == MACL_REG || reg == MACH_REG)
7258 && df_regs_ever_live_p (reg)
7259 && sh_cfun_attr_renesas_p ())
7262 SET_HARD_REG_BIT (*live_regs_mask, reg);
7263 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7265 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7266 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7268 if (FP_REGISTER_P (reg))
7270 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7272 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7273 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7276 else if (XD_REGISTER_P (reg))
7278 /* Must switch to double mode to access these registers. */
7279 target_flags &= ~MASK_FPU_SINGLE;
7283 if (nosave_low_regs && reg == R8_REG)
7284 break;
7286 /* If we have a target register optimization pass after prologue / epilogue
7287 threading, we need to assume all target registers will be live even if
7288 they aren't now. */
7289 if (flag_branch_target_load_optimize2
7290 && TARGET_SAVE_ALL_TARGET_REGS
7291 && shmedia_space_reserved_for_target_registers)
7292 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7293 if ((! call_really_used_regs[reg] || interrupt_handler)
7294 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7296 SET_HARD_REG_BIT (*live_regs_mask, reg);
7297 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7299 /* If this is an interrupt handler, we don't have any call-clobbered
7300 registers we can conveniently use for target register save/restore.
7301 Make sure we save at least one general purpose register when we need
7302 to save target registers. */
7303 if (interrupt_handler
7304 && hard_reg_set_intersect_p (*live_regs_mask,
7305 reg_class_contents[TARGET_REGS])
7306 && ! hard_reg_set_intersect_p (*live_regs_mask,
7307 reg_class_contents[GENERAL_REGS]))
7309 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7310 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7313 return count;
7316 /* Code to generate prologue and epilogue sequences */
7318 /* PUSHED is the number of bytes that are being pushed on the
7319 stack for register saves. Return the frame size, padded
7320 appropriately so that the stack stays properly aligned. */
7321 static HOST_WIDE_INT
7322 rounded_frame_size (int pushed)
7324 HOST_WIDE_INT size = get_frame_size ();
7325 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7327 if (ACCUMULATE_OUTGOING_ARGS)
7328 size += crtl->outgoing_args_size;
7330 return ((size + pushed + align - 1) & -align) - pushed;
7333 /* Choose a call-clobbered target-branch register that remains
7334 unchanged along the whole function. We set it up as the return
7335 value in the prologue. */
7337 sh_media_register_for_return (void)
7339 int regno;
7340 int tr0_used;
7342 if (! crtl->is_leaf)
7343 return -1;
7344 if (lookup_attribute ("interrupt_handler",
7345 DECL_ATTRIBUTES (current_function_decl)))
7346 return -1;
7347 if (sh_cfun_interrupt_handler_p ())
7348 return -1;
7350 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7352 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7353 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7354 return regno;
7356 return -1;
7359 /* The maximum registers we need to save are:
7360 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7361 - 32 floating point registers (for each pair, we save none,
7362 one single precision value, or a double precision value).
7363 - 8 target registers
7364 - add 1 entry for a delimiter. */
7365 #define MAX_SAVED_REGS (62+32+8)
7367 typedef struct save_entry_s
7369 unsigned char reg;
7370 unsigned char mode;
7371 short offset;
7372 } save_entry;
7374 #define MAX_TEMPS 4
7376 /* There will be a delimiter entry with VOIDmode both at the start and the
7377 end of a filled in schedule. The end delimiter has the offset of the
7378 save with the smallest (i.e. most negative) offset. */
7379 typedef struct save_schedule_s
7381 save_entry entries[MAX_SAVED_REGS + 2];
7382 int temps[MAX_TEMPS+1];
7383 } save_schedule;
7385 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7386 use reverse order. Returns the last entry written to (not counting
7387 the delimiter). OFFSET_BASE is a number to be added to all offset
7388 entries. */
7389 static save_entry *
7390 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7391 int offset_base)
7393 int align, i;
7394 save_entry *entry = schedule->entries;
7395 int tmpx = 0;
7396 int offset;
7398 if (! current_function_interrupt)
7399 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7400 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7401 && ! FUNCTION_ARG_REGNO_P (i)
7402 && i != FIRST_RET_REG
7403 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7404 && ! (crtl->calls_eh_return
7405 && (i == EH_RETURN_STACKADJ_REGNO
7406 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7407 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7408 schedule->temps[tmpx++] = i;
7409 entry->reg = -1;
7410 entry->mode = VOIDmode;
7411 entry->offset = offset_base;
7412 entry++;
7413 /* We loop twice: first, we save 8-byte aligned registers in the
7414 higher addresses, that are known to be aligned. Then, we
7415 proceed to saving 32-bit registers that don't need 8-byte
7416 alignment.
7417 If this is an interrupt function, all registers that need saving
7418 need to be saved in full. moreover, we need to postpone saving
7419 target registers till we have saved some general purpose registers
7420 we can then use as scratch registers. */
7421 offset = offset_base;
7422 for (align = 1; align >= 0; align--)
7424 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7425 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7427 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
7428 int reg = i;
7430 if (current_function_interrupt)
7432 if (TARGET_REGISTER_P (i))
7433 continue;
7434 if (GENERAL_REGISTER_P (i))
7435 mode = DImode;
7437 if (mode == SFmode && (i % 2) == 1
7438 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7439 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7441 mode = DFmode;
7442 i--;
7443 reg--;
7446 /* If we're doing the aligned pass and this is not aligned,
7447 or we're doing the unaligned pass and this is aligned,
7448 skip it. */
7449 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7450 != align)
7451 continue;
7453 if (current_function_interrupt
7454 && GENERAL_REGISTER_P (i)
7455 && tmpx < MAX_TEMPS)
7456 schedule->temps[tmpx++] = i;
7458 offset -= GET_MODE_SIZE (mode);
7459 entry->reg = i;
7460 entry->mode = mode;
7461 entry->offset = offset;
7462 entry++;
7464 if (align && current_function_interrupt)
7465 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7466 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7468 offset -= GET_MODE_SIZE (DImode);
7469 entry->reg = i;
7470 entry->mode = DImode;
7471 entry->offset = offset;
7472 entry++;
7475 entry->reg = -1;
7476 entry->mode = VOIDmode;
7477 entry->offset = offset;
7478 schedule->temps[tmpx] = -1;
7479 return entry - 1;
7482 /* Expand code for the function prologue. */
7483 void
7484 sh_expand_prologue (void)
7486 HARD_REG_SET live_regs_mask;
7487 int d, i;
7488 int d_rounding = 0;
7489 int save_flags = target_flags;
7490 int pretend_args;
7491 int stack_usage;
7492 tree sp_switch_attr
7493 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7495 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7497 /* We have pretend args if we had an object sent partially in registers
7498 and partially on the stack, e.g. a large structure. */
7499 pretend_args = crtl->args.pretend_args_size;
7500 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7501 && (NPARM_REGS(SImode)
7502 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7503 pretend_args = 0;
7505 output_stack_adjust (-pretend_args
7506 - crtl->args.info.stack_regs * 8,
7507 stack_pointer_rtx, 0, NULL, true);
7508 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7510 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7511 /* We're going to use the PIC register to load the address of the
7512 incoming-argument decoder and/or of the return trampoline from
7513 the GOT, so make sure the PIC register is preserved and
7514 initialized. */
7515 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7517 if (TARGET_SHCOMPACT
7518 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7520 int reg;
7522 /* First, make all registers with incoming arguments that will
7523 be pushed onto the stack live, so that register renaming
7524 doesn't overwrite them. */
7525 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7526 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7527 >= NPARM_REGS (SImode) - reg)
7528 for (; reg < NPARM_REGS (SImode); reg++)
7529 emit_insn (gen_shcompact_preserve_incoming_args
7530 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7531 else if (CALL_COOKIE_INT_REG_GET
7532 (crtl->args.info.call_cookie, reg) == 1)
7533 emit_insn (gen_shcompact_preserve_incoming_args
7534 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7536 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7537 stack_pointer_rtx);
7538 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7539 GEN_INT (crtl->args.info.call_cookie));
7540 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7541 gen_rtx_REG (SImode, R0_REG));
7543 else if (TARGET_SHMEDIA)
7545 int tr = sh_media_register_for_return ();
7547 if (tr >= 0)
7548 emit_move_insn (gen_rtx_REG (DImode, tr),
7549 gen_rtx_REG (DImode, PR_MEDIA_REG));
7552 /* Emit the code for SETUP_VARARGS. */
7553 if (cfun->stdarg)
7555 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7557 /* Push arg regs as if they'd been provided by caller in stack. */
7558 for (i = 0; i < NPARM_REGS(SImode); i++)
7560 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7562 if (i >= (NPARM_REGS(SImode)
7563 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7565 break;
7566 push (rn);
7567 stack_usage += GET_MODE_SIZE (SImode);
7572 /* If we're supposed to switch stacks at function entry, do so now. */
7573 if (sp_switch_attr)
7575 rtx lab, newsrc;
7576 /* The argument specifies a variable holding the address of the
7577 stack the interrupt function should switch to/from at entry/exit. */
7578 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7579 const char *s
7580 = ggc_strdup (TREE_STRING_POINTER (arg));
7581 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7583 lab = add_constant (sp_switch, SImode, 0);
7584 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7586 emit_insn (gen_sp_switch_1 (newsrc));
7589 d = calc_live_regs (&live_regs_mask);
7590 /* ??? Maybe we could save some switching if we can move a mode switch
7591 that already happens to be at the function start into the prologue. */
7592 if (target_flags != save_flags && ! current_function_interrupt)
7593 emit_insn (gen_toggle_sz ());
7595 if (TARGET_SH5)
7597 int offset_base, offset;
7598 rtx r0 = NULL_RTX;
7599 int offset_in_r0 = -1;
7600 int sp_in_r0 = 0;
7601 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7602 int total_size, save_size;
7603 save_schedule schedule;
7604 save_entry *entry;
7605 int *tmp_pnt;
7607 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7608 && ! current_function_interrupt)
7609 r0 = gen_rtx_REG (Pmode, R0_REG);
7611 /* D is the actual number of bytes that we need for saving registers,
7612 however, in initial_elimination_offset we have committed to using
7613 an additional TREGS_SPACE amount of bytes - in order to keep both
7614 addresses to arguments supplied by the caller and local variables
7615 valid, we must keep this gap. Place it between the incoming
7616 arguments and the actually saved registers in a bid to optimize
7617 locality of reference. */
7618 total_size = d + tregs_space;
7619 total_size += rounded_frame_size (total_size);
7620 save_size = total_size - rounded_frame_size (d);
7621 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7622 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7623 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7625 /* If adjusting the stack in a single step costs nothing extra, do so.
7626 I.e. either if a single addi is enough, or we need a movi anyway,
7627 and we don't exceed the maximum offset range (the test for the
7628 latter is conservative for simplicity). */
7629 if (TARGET_SHMEDIA
7630 && (CONST_OK_FOR_I10 (-total_size)
7631 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7632 && total_size <= 2044)))
7633 d_rounding = total_size - save_size;
7635 offset_base = d + d_rounding;
7637 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7638 0, NULL, true);
7639 stack_usage += save_size + d_rounding;
7641 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7642 tmp_pnt = schedule.temps;
7643 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7645 enum machine_mode mode = (enum machine_mode) entry->mode;
7646 unsigned int reg = entry->reg;
7647 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7648 rtx orig_reg_rtx;
7650 offset = entry->offset;
7652 reg_rtx = gen_rtx_REG (mode, reg);
7654 mem_rtx = gen_frame_mem (mode,
7655 gen_rtx_PLUS (Pmode,
7656 stack_pointer_rtx,
7657 GEN_INT (offset)));
7659 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7661 gcc_assert (r0);
7662 mem_rtx = NULL_RTX;
7665 if (HAVE_PRE_DECREMENT
7666 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7667 || mem_rtx == NULL_RTX
7668 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7670 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7672 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7673 pre_dec = NULL_RTX;
7674 else
7676 mem_rtx = NULL_RTX;
7677 offset += GET_MODE_SIZE (mode);
7681 if (mem_rtx != NULL_RTX)
7682 goto addr_ok;
7684 if (offset_in_r0 == -1)
7686 emit_move_insn (r0, GEN_INT (offset));
7687 offset_in_r0 = offset;
7689 else if (offset != offset_in_r0)
7691 emit_move_insn (r0,
7692 gen_rtx_PLUS
7693 (Pmode, r0,
7694 GEN_INT (offset - offset_in_r0)));
7695 offset_in_r0 += offset - offset_in_r0;
7698 if (pre_dec != NULL_RTX)
7700 if (! sp_in_r0)
7702 emit_move_insn (r0,
7703 gen_rtx_PLUS
7704 (Pmode, r0, stack_pointer_rtx));
7705 sp_in_r0 = 1;
7708 offset -= GET_MODE_SIZE (mode);
7709 offset_in_r0 -= GET_MODE_SIZE (mode);
7711 mem_rtx = pre_dec;
7713 else if (sp_in_r0)
7714 mem_rtx = gen_frame_mem (mode, r0);
7715 else
7716 mem_rtx = gen_frame_mem (mode,
7717 gen_rtx_PLUS (Pmode,
7718 stack_pointer_rtx,
7719 r0));
7721 /* We must not use an r0-based address for target-branch
7722 registers or for special registers without pre-dec
7723 memory addresses, since we store their values in r0
7724 first. */
7725 gcc_assert (!TARGET_REGISTER_P (reg)
7726 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7727 || mem_rtx == pre_dec));
7729 addr_ok:
7730 orig_reg_rtx = reg_rtx;
7731 if (TARGET_REGISTER_P (reg)
7732 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7733 && mem_rtx != pre_dec))
7735 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7737 emit_move_insn (tmp_reg, reg_rtx);
7739 if (REGNO (tmp_reg) == R0_REG)
7741 offset_in_r0 = -1;
7742 sp_in_r0 = 0;
7743 gcc_assert (!refers_to_regno_p
7744 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7747 if (*++tmp_pnt <= 0)
7748 tmp_pnt = schedule.temps;
7750 reg_rtx = tmp_reg;
7753 rtx insn;
7755 /* Mark as interesting for dwarf cfi generator */
7756 insn = emit_move_insn (mem_rtx, reg_rtx);
7757 RTX_FRAME_RELATED_P (insn) = 1;
7758 /* If we use an intermediate register for the save, we can't
7759 describe this exactly in cfi as a copy of the to-be-saved
7760 register into the temporary register and then the temporary
7761 register on the stack, because the temporary register can
7762 have a different natural size than the to-be-saved register.
7763 Thus, we gloss over the intermediate copy and pretend we do
7764 a direct save from the to-be-saved register. */
7765 if (REGNO (reg_rtx) != reg)
7767 rtx set;
7769 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7770 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7773 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7775 rtx reg_rtx = gen_rtx_REG (mode, reg);
7776 rtx set;
7777 rtx mem_rtx = gen_frame_mem (mode,
7778 gen_rtx_PLUS (Pmode,
7779 stack_pointer_rtx,
7780 GEN_INT (offset)));
7782 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7783 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7788 gcc_assert (entry->offset == d_rounding);
7790 else
7792 push_regs (&live_regs_mask, current_function_interrupt);
7793 stack_usage += d;
7796 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7797 emit_insn (gen_GOTaddr2picreg ());
7799 if (SHMEDIA_REGS_STACK_ADJUST ())
7801 /* This must NOT go through the PLT, otherwise mach and macl
7802 may be clobbered. */
7803 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7804 (TARGET_FPU_ANY
7805 ? "__GCC_push_shmedia_regs"
7806 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7807 emit_insn (gen_shmedia_save_restore_regs_compact
7808 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7811 if (target_flags != save_flags && ! current_function_interrupt)
7812 emit_insn (gen_toggle_sz ());
7814 target_flags = save_flags;
7816 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7817 stack_pointer_rtx, 0, NULL, true);
7818 stack_usage += rounded_frame_size (d) - d_rounding;
7820 if (frame_pointer_needed)
7821 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7823 if (TARGET_SHCOMPACT
7824 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7826 /* This must NOT go through the PLT, otherwise mach and macl
7827 may be clobbered. */
7828 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7829 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7830 emit_insn (gen_shcompact_incoming_args ());
7833 /* If we are profiling, make sure no instructions are scheduled before
7834 the call to mcount. Similarly if some call instructions are swapped
7835 before frame related insns, it'll confuse the unwinder because
7836 currently SH has no unwind info for function epilogues. */
7837 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7838 emit_insn (gen_blockage ());
7840 if (flag_stack_usage_info)
7841 current_function_static_stack_size = stack_usage;
7844 /* Expand code for the function epilogue. */
7845 void
7846 sh_expand_epilogue (bool sibcall_p)
7848 HARD_REG_SET live_regs_mask;
7849 int d, i;
7850 int d_rounding = 0;
7852 int save_flags = target_flags;
7853 int frame_size, save_size;
7854 int fpscr_deferred = 0;
7855 int e = sibcall_p ? -1 : 1;
7857 d = calc_live_regs (&live_regs_mask);
7859 save_size = d;
7860 frame_size = rounded_frame_size (d);
7862 if (TARGET_SH5)
7864 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7865 int total_size;
7866 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7867 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7868 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7870 total_size = d + tregs_space;
7871 total_size += rounded_frame_size (total_size);
7872 save_size = total_size - frame_size;
7874 /* If adjusting the stack in a single step costs nothing extra, do so.
7875 I.e. either if a single addi is enough, or we need a movi anyway,
7876 and we don't exceed the maximum offset range (the test for the
7877 latter is conservative for simplicity). */
7878 if (TARGET_SHMEDIA
7879 && ! frame_pointer_needed
7880 && (CONST_OK_FOR_I10 (total_size)
7881 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7882 && total_size <= 2044)))
7883 d_rounding = frame_size;
7885 frame_size -= d_rounding;
7888 if (frame_pointer_needed)
7890 /* We must avoid scheduling the epilogue with previous basic blocks.
7891 See PR/18032 and PR/40313. */
7892 emit_insn (gen_blockage ());
7893 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7894 &live_regs_mask, true);
7896 /* We must avoid moving the stack pointer adjustment past code
7897 which reads from the local frame, else an interrupt could
7898 occur after the SP adjustment and clobber data in the local
7899 frame. */
7900 emit_insn (gen_blockage ());
7901 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7903 else if (frame_size)
7905 /* We must avoid moving the stack pointer adjustment past code
7906 which reads from the local frame, else an interrupt could
7907 occur after the SP adjustment and clobber data in the local
7908 frame. */
7909 emit_insn (gen_blockage ());
7910 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7911 &live_regs_mask, true);
7914 if (SHMEDIA_REGS_STACK_ADJUST ())
7916 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7917 (TARGET_FPU_ANY
7918 ? "__GCC_pop_shmedia_regs"
7919 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7920 /* This must NOT go through the PLT, otherwise mach and macl
7921 may be clobbered. */
7922 emit_insn (gen_shmedia_save_restore_regs_compact
7923 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7926 /* Pop all the registers. */
7928 if (target_flags != save_flags && ! current_function_interrupt)
7929 emit_insn (gen_toggle_sz ());
7930 if (TARGET_SH5)
7932 int offset_base, offset;
7933 int offset_in_r0 = -1;
7934 int sp_in_r0 = 0;
7935 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7936 save_schedule schedule;
7937 save_entry *entry;
7938 int *tmp_pnt;
7940 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7941 offset_base = -entry[1].offset + d_rounding;
7942 tmp_pnt = schedule.temps;
7943 for (; entry->mode != VOIDmode; entry--)
7945 enum machine_mode mode = (enum machine_mode) entry->mode;
7946 int reg = entry->reg;
7947 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7949 offset = offset_base + entry->offset;
7950 reg_rtx = gen_rtx_REG (mode, reg);
7952 mem_rtx = gen_frame_mem (mode,
7953 gen_rtx_PLUS (Pmode,
7954 stack_pointer_rtx,
7955 GEN_INT (offset)));
7957 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7958 mem_rtx = NULL_RTX;
7960 if (HAVE_POST_INCREMENT
7961 && (offset == offset_in_r0
7962 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7963 && mem_rtx == NULL_RTX)
7964 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7966 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7968 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7969 post_inc = NULL_RTX;
7970 else
7971 mem_rtx = NULL_RTX;
7974 if (mem_rtx != NULL_RTX)
7975 goto addr_ok;
7977 if (offset_in_r0 == -1)
7979 emit_move_insn (r0, GEN_INT (offset));
7980 offset_in_r0 = offset;
7982 else if (offset != offset_in_r0)
7984 emit_move_insn (r0,
7985 gen_rtx_PLUS
7986 (Pmode, r0,
7987 GEN_INT (offset - offset_in_r0)));
7988 offset_in_r0 += offset - offset_in_r0;
7991 if (post_inc != NULL_RTX)
7993 if (! sp_in_r0)
7995 emit_move_insn (r0,
7996 gen_rtx_PLUS
7997 (Pmode, r0, stack_pointer_rtx));
7998 sp_in_r0 = 1;
8001 mem_rtx = post_inc;
8003 offset_in_r0 += GET_MODE_SIZE (mode);
8005 else if (sp_in_r0)
8006 mem_rtx = gen_frame_mem (mode, r0);
8007 else
8008 mem_rtx = gen_frame_mem (mode,
8009 gen_rtx_PLUS (Pmode,
8010 stack_pointer_rtx,
8011 r0));
8013 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8014 || mem_rtx == post_inc);
8016 addr_ok:
8017 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8018 && mem_rtx != post_inc)
8020 emit_move_insn (r0, mem_rtx);
8021 mem_rtx = r0;
8023 else if (TARGET_REGISTER_P (reg))
8025 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8027 /* Give the scheduler a bit of freedom by using up to
8028 MAX_TEMPS registers in a round-robin fashion. */
8029 emit_move_insn (tmp_reg, mem_rtx);
8030 mem_rtx = tmp_reg;
8031 if (*++tmp_pnt < 0)
8032 tmp_pnt = schedule.temps;
8035 emit_move_insn (reg_rtx, mem_rtx);
8038 gcc_assert (entry->offset + offset_base == d + d_rounding);
8040 else /* ! TARGET_SH5 */
8042 int last_reg;
8044 save_size = 0;
8045 /* For an ISR with RESBANK attribute assigned, don't pop PR
8046 register. */
8047 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8048 && !sh_cfun_resbank_handler_p ())
8050 if (!frame_pointer_needed)
8051 emit_insn (gen_blockage ());
8052 pop (PR_REG);
8055 /* Banked registers are popped first to avoid being scheduled in the
8056 delay slot. RTE switches banks before the ds instruction. */
8057 if (current_function_interrupt)
8059 bool use_movml = false;
8061 if (TARGET_SH2A)
8063 unsigned int count = 0;
8065 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8066 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8067 count++;
8068 else
8069 break;
8071 /* Use movml when all banked register are poped. */
8072 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8073 use_movml = true;
8076 if (sh_cfun_resbank_handler_p ())
8077 ; /* Do nothing. */
8078 else if (use_movml)
8080 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8082 /* We must avoid scheduling multiple load insn with another
8083 insns. */
8084 emit_insn (gen_blockage ());
8085 emit_insn (gen_movml_pop_banked (sp_reg));
8086 emit_insn (gen_blockage ());
8088 else
8089 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8090 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8091 pop (i);
8093 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8095 else
8096 last_reg = FIRST_PSEUDO_REGISTER;
8098 for (i = 0; i < last_reg; i++)
8100 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8102 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8103 && hard_reg_set_intersect_p (live_regs_mask,
8104 reg_class_contents[DF_REGS]))
8105 fpscr_deferred = 1;
8106 /* For an ISR with RESBANK attribute assigned, don't pop
8107 following registers, R0-R14, MACH, MACL and GBR. */
8108 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8109 && ! (sh_cfun_resbank_handler_p ()
8110 && ((j >= FIRST_GENERAL_REG
8111 && j < LAST_GENERAL_REG)
8112 || j == MACH_REG
8113 || j == MACL_REG
8114 || j == GBR_REG)))
8115 pop (j);
8117 if (j == FIRST_FP_REG && fpscr_deferred)
8118 pop (FPSCR_REG);
8121 if (target_flags != save_flags && ! current_function_interrupt)
8122 emit_insn (gen_toggle_sz ());
8123 target_flags = save_flags;
8125 output_stack_adjust (crtl->args.pretend_args_size
8126 + save_size + d_rounding
8127 + crtl->args.info.stack_regs * 8,
8128 stack_pointer_rtx, e, NULL, true);
8130 if (crtl->calls_eh_return)
8131 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8132 EH_RETURN_STACKADJ_RTX));
8134 /* Switch back to the normal stack if necessary. */
8135 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8136 emit_insn (gen_sp_switch_2 ());
8138 /* Tell flow the insn that pops PR isn't dead. */
8139 /* PR_REG will never be live in SHmedia mode, and we don't need to
8140 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8141 by the return pattern. */
8142 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8143 emit_use (gen_rtx_REG (SImode, PR_REG));
8146 /* Emit code to change the current function's return address to RA.
8147 TEMP is available as a scratch register, if needed. */
8148 void
8149 sh_set_return_address (rtx ra, rtx tmp)
8151 HARD_REG_SET live_regs_mask;
8152 int d;
8153 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8154 int pr_offset;
8156 d = calc_live_regs (&live_regs_mask);
8158 /* If pr_reg isn't life, we can set it (or the register given in
8159 sh_media_register_for_return) directly. */
8160 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8162 rtx rr;
8164 if (TARGET_SHMEDIA)
8166 int rr_regno = sh_media_register_for_return ();
8168 if (rr_regno < 0)
8169 rr_regno = pr_reg;
8171 rr = gen_rtx_REG (DImode, rr_regno);
8173 else
8174 rr = gen_rtx_REG (SImode, pr_reg);
8176 emit_insn (GEN_MOV (rr, ra));
8177 /* Tell flow the register for return isn't dead. */
8178 emit_use (rr);
8179 return;
8182 if (TARGET_SH5)
8184 int offset;
8185 save_schedule schedule;
8186 save_entry *entry;
8188 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8189 offset = entry[1].offset;
8190 for (; entry->mode != VOIDmode; entry--)
8191 if (entry->reg == pr_reg)
8192 goto found;
8194 /* We can't find pr register. */
8195 gcc_unreachable ();
8197 found:
8198 offset = entry->offset - offset;
8199 pr_offset = (rounded_frame_size (d) + offset
8200 + SHMEDIA_REGS_STACK_ADJUST ());
8202 else
8203 pr_offset = rounded_frame_size (d);
8205 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8207 if (frame_pointer_needed)
8208 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8209 else
8210 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8212 tmp = gen_frame_mem (Pmode, tmp);
8213 emit_insn (GEN_MOV (tmp, ra));
8214 /* Tell this store isn't dead. */
8215 emit_use (tmp);
8218 /* Clear variables at function end. */
8219 static void
8220 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8221 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8225 static rtx
8226 sh_builtin_saveregs (void)
8228 /* First unnamed integer register. */
8229 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8230 /* Number of integer registers we need to save. */
8231 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8232 /* First unnamed SFmode float reg */
8233 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8234 /* Number of SFmode float regs to save. */
8235 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8236 rtx regbuf, fpregs;
8237 int bufsize, regno;
8238 alias_set_type alias_set;
8240 if (TARGET_SH5)
8242 if (n_intregs)
8244 int pushregs = n_intregs;
8246 while (pushregs < NPARM_REGS (SImode) - 1
8247 && (CALL_COOKIE_INT_REG_GET
8248 (crtl->args.info.call_cookie,
8249 NPARM_REGS (SImode) - pushregs)
8250 == 1))
8252 crtl->args.info.call_cookie
8253 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8254 - pushregs, 1);
8255 pushregs++;
8258 if (pushregs == NPARM_REGS (SImode))
8259 crtl->args.info.call_cookie
8260 |= (CALL_COOKIE_INT_REG (0, 1)
8261 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8262 else
8263 crtl->args.info.call_cookie
8264 |= CALL_COOKIE_STACKSEQ (pushregs);
8266 crtl->args.pretend_args_size += 8 * n_intregs;
8268 if (TARGET_SHCOMPACT)
8269 return const0_rtx;
8272 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8274 error ("__builtin_saveregs not supported by this subtarget");
8275 return const0_rtx;
8278 if (TARGET_SHMEDIA)
8279 n_floatregs = 0;
8281 /* Allocate block of memory for the regs. */
8282 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8283 Or can assign_stack_local accept a 0 SIZE argument? */
8284 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8286 if (TARGET_SHMEDIA)
8287 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8288 else if (n_floatregs & 1)
8290 rtx addr;
8292 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8293 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8294 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8295 regbuf = change_address (regbuf, BLKmode, addr);
8297 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8299 rtx addr, mask;
8301 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8302 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8303 XEXP (regbuf, 0), 4));
8304 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8305 emit_insn (gen_andsi3 (addr, addr, mask));
8306 regbuf = change_address (regbuf, BLKmode, addr);
8308 else
8309 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8310 alias_set = get_varargs_alias_set ();
8311 set_mem_alias_set (regbuf, alias_set);
8313 /* Save int args.
8314 This is optimized to only save the regs that are necessary. Explicitly
8315 named args need not be saved. */
8316 if (n_intregs > 0)
8317 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8318 adjust_address (regbuf, BLKmode,
8319 n_floatregs * UNITS_PER_WORD),
8320 n_intregs);
8322 if (TARGET_SHMEDIA)
8323 /* Return the address of the regbuf. */
8324 return XEXP (regbuf, 0);
8326 /* Save float args.
8327 This is optimized to only save the regs that are necessary. Explicitly
8328 named args need not be saved.
8329 We explicitly build a pointer to the buffer because it halves the insn
8330 count when not optimizing (otherwise the pointer is built for each reg
8331 saved).
8332 We emit the moves in reverse order so that we can use predecrement. */
8334 fpregs = copy_to_mode_reg (Pmode,
8335 plus_constant (Pmode, XEXP (regbuf, 0),
8336 n_floatregs * UNITS_PER_WORD));
8337 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8339 rtx mem;
8340 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8342 emit_insn (gen_addsi3 (fpregs, fpregs,
8343 GEN_INT (-2 * UNITS_PER_WORD)));
8344 mem = change_address (regbuf, DFmode, fpregs);
8345 emit_move_insn (mem,
8346 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8348 regno = first_floatreg;
8349 if (regno & 1)
8351 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8352 mem = change_address (regbuf, SFmode, fpregs);
8353 emit_move_insn (mem,
8354 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8355 + regno - SH_REG_MSW_OFFSET));
8358 else
8359 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8361 rtx mem;
8363 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8364 mem = change_address (regbuf, SFmode, fpregs);
8365 emit_move_insn (mem,
8366 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8369 /* Return the address of the regbuf. */
8370 return XEXP (regbuf, 0);
8373 /* Define the `__builtin_va_list' type for the ABI. */
8374 static tree
8375 sh_build_builtin_va_list (void)
8377 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8378 tree record, type_decl;
8380 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8381 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8382 return ptr_type_node;
8384 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8385 type_decl = build_decl (BUILTINS_LOCATION,
8386 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8388 f_next_o = build_decl (BUILTINS_LOCATION,
8389 FIELD_DECL, get_identifier ("__va_next_o"),
8390 ptr_type_node);
8391 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8392 FIELD_DECL,
8393 get_identifier ("__va_next_o_limit"),
8394 ptr_type_node);
8395 f_next_fp = build_decl (BUILTINS_LOCATION,
8396 FIELD_DECL, get_identifier ("__va_next_fp"),
8397 ptr_type_node);
8398 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8399 FIELD_DECL,
8400 get_identifier ("__va_next_fp_limit"),
8401 ptr_type_node);
8402 f_next_stack = build_decl (BUILTINS_LOCATION,
8403 FIELD_DECL, get_identifier ("__va_next_stack"),
8404 ptr_type_node);
8406 DECL_FIELD_CONTEXT (f_next_o) = record;
8407 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8408 DECL_FIELD_CONTEXT (f_next_fp) = record;
8409 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8410 DECL_FIELD_CONTEXT (f_next_stack) = record;
8412 TYPE_STUB_DECL (record) = type_decl;
8413 TYPE_NAME (record) = type_decl;
8414 TYPE_FIELDS (record) = f_next_o;
8415 DECL_CHAIN (f_next_o) = f_next_o_limit;
8416 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8417 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8418 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8420 layout_type (record);
8422 return record;
8425 /* Implement `va_start' for varargs and stdarg. */
8426 static void
8427 sh_va_start (tree valist, rtx nextarg)
8429 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8430 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8431 tree t, u;
8432 int nfp, nint;
8434 if (TARGET_SH5)
8436 expand_builtin_saveregs ();
8437 std_expand_builtin_va_start (valist, nextarg);
8438 return;
8441 if ((! TARGET_SH2E && ! TARGET_SH4)
8442 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8444 std_expand_builtin_va_start (valist, nextarg);
8445 return;
8448 f_next_o = TYPE_FIELDS (va_list_type_node);
8449 f_next_o_limit = DECL_CHAIN (f_next_o);
8450 f_next_fp = DECL_CHAIN (f_next_o_limit);
8451 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8452 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8454 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8455 NULL_TREE);
8456 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8457 valist, f_next_o_limit, NULL_TREE);
8458 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8459 NULL_TREE);
8460 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8461 valist, f_next_fp_limit, NULL_TREE);
8462 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8463 valist, f_next_stack, NULL_TREE);
8465 /* Call __builtin_saveregs. */
8466 u = make_tree (sizetype, expand_builtin_saveregs ());
8467 u = fold_convert (ptr_type_node, u);
8468 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8469 TREE_SIDE_EFFECTS (t) = 1;
8470 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8472 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8473 if (nfp < 8)
8474 nfp = 8 - nfp;
8475 else
8476 nfp = 0;
8477 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8478 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8479 TREE_SIDE_EFFECTS (t) = 1;
8480 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8482 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8483 TREE_SIDE_EFFECTS (t) = 1;
8484 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8486 nint = crtl->args.info.arg_count[SH_ARG_INT];
8487 if (nint < 4)
8488 nint = 4 - nint;
8489 else
8490 nint = 0;
8491 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8492 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8493 TREE_SIDE_EFFECTS (t) = 1;
8494 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8496 u = make_tree (ptr_type_node, nextarg);
8497 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8498 TREE_SIDE_EFFECTS (t) = 1;
8499 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8502 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8503 member, return it. */
8504 static tree
8505 find_sole_member (tree type)
8507 tree field, member = NULL_TREE;
8509 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8511 if (TREE_CODE (field) != FIELD_DECL)
8512 continue;
8513 if (!DECL_SIZE (field))
8514 return NULL_TREE;
8515 if (integer_zerop (DECL_SIZE (field)))
8516 continue;
8517 if (member)
8518 return NULL_TREE;
8519 member = field;
8521 return member;
8524 /* Implement `va_arg'. */
8525 static tree
8526 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8527 gimple_seq *post_p ATTRIBUTE_UNUSED)
8529 HOST_WIDE_INT size, rsize;
8530 tree tmp, pptr_type_node;
8531 tree addr, lab_over = NULL, result = NULL;
8532 bool pass_by_ref;
8533 tree eff_type;
8535 if (!VOID_TYPE_P (type))
8536 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8537 else
8538 pass_by_ref = false;
8540 if (pass_by_ref)
8541 type = build_pointer_type (type);
8543 size = int_size_in_bytes (type);
8544 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8545 pptr_type_node = build_pointer_type (ptr_type_node);
8547 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8548 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8550 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8551 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8552 int pass_as_float;
8553 tree lab_false;
8554 tree member;
8556 f_next_o = TYPE_FIELDS (va_list_type_node);
8557 f_next_o_limit = DECL_CHAIN (f_next_o);
8558 f_next_fp = DECL_CHAIN (f_next_o_limit);
8559 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8560 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8562 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8563 NULL_TREE);
8564 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8565 valist, f_next_o_limit, NULL_TREE);
8566 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8567 valist, f_next_fp, NULL_TREE);
8568 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8569 valist, f_next_fp_limit, NULL_TREE);
8570 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8571 valist, f_next_stack, NULL_TREE);
8573 /* Structures with a single member with a distinct mode are passed
8574 like their member. This is relevant if the latter has a REAL_TYPE
8575 or COMPLEX_TYPE type. */
8576 eff_type = type;
8577 while (TREE_CODE (eff_type) == RECORD_TYPE
8578 && (member = find_sole_member (eff_type))
8579 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8580 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8581 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8583 tree field_type = TREE_TYPE (member);
8585 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8586 eff_type = field_type;
8587 else
8589 gcc_assert ((TYPE_ALIGN (eff_type)
8590 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8591 || (TYPE_ALIGN (eff_type)
8592 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8593 break;
8597 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8599 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8600 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8601 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8602 && size <= 16));
8604 else
8606 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8609 addr = create_tmp_var (pptr_type_node, NULL);
8610 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8611 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8613 valist = build_simple_mem_ref (addr);
8615 if (pass_as_float)
8617 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8618 tree cmp;
8619 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8621 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8622 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8624 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8625 tmp = next_fp_limit;
8626 if (size > 4 && !is_double)
8627 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8628 tmp = build2 (GE_EXPR, boolean_type_node,
8629 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8630 cmp = build3 (COND_EXPR, void_type_node, tmp,
8631 build1 (GOTO_EXPR, void_type_node,
8632 unshare_expr (lab_false)), NULL_TREE);
8633 if (!is_double)
8634 gimplify_and_add (cmp, pre_p);
8636 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8637 || (is_double || size == 16))
8639 tmp = fold_convert (sizetype, next_fp_tmp);
8640 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8641 size_int (UNITS_PER_WORD));
8642 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8643 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8645 if (is_double)
8646 gimplify_and_add (cmp, pre_p);
8648 #ifdef FUNCTION_ARG_SCmode_WART
8649 if (TYPE_MODE (eff_type) == SCmode
8650 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8652 tree subtype = TREE_TYPE (eff_type);
8653 tree real, imag;
8655 imag
8656 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8657 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8659 real
8660 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8661 real = get_initialized_tmp_var (real, pre_p, NULL);
8663 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8664 if (type != eff_type)
8665 result = build1 (VIEW_CONVERT_EXPR, type, result);
8666 result = get_initialized_tmp_var (result, pre_p, NULL);
8668 #endif /* FUNCTION_ARG_SCmode_WART */
8670 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8671 gimplify_and_add (tmp, pre_p);
8673 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8674 gimplify_and_add (tmp, pre_p);
8676 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8677 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8678 gimplify_assign (unshare_expr (next_fp_tmp),
8679 unshare_expr (valist), pre_p);
8681 gimplify_assign (unshare_expr (valist),
8682 unshare_expr (next_fp_tmp), post_p);
8683 valist = next_fp_tmp;
8685 else
8687 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8688 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8689 unshare_expr (next_o_limit));
8690 tmp = build3 (COND_EXPR, void_type_node, tmp,
8691 build1 (GOTO_EXPR, void_type_node,
8692 unshare_expr (lab_false)),
8693 NULL_TREE);
8694 gimplify_and_add (tmp, pre_p);
8696 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8697 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8699 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8700 gimplify_and_add (tmp, pre_p);
8702 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8703 gimplify_and_add (tmp, pre_p);
8705 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8706 gimplify_assign (unshare_expr (next_o),
8707 unshare_expr (next_o_limit), pre_p);
8709 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8710 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8713 if (!result)
8715 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8716 gimplify_and_add (tmp, pre_p);
8720 /* ??? In va-sh.h, there had been code to make values larger than
8721 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8723 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8724 if (result)
8726 gimplify_assign (result, tmp, pre_p);
8727 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8728 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8729 gimplify_and_add (tmp, pre_p);
8731 else
8732 result = tmp;
8734 if (pass_by_ref)
8735 result = build_va_arg_indirect_ref (result);
8737 return result;
8740 /* 64 bit floating points memory transfers are paired single precision loads
8741 or store. So DWARF information needs fixing in little endian (unless
8742 PR=SZ=1 in FPSCR). */
8744 sh_dwarf_register_span (rtx reg)
8746 unsigned regno = REGNO (reg);
8748 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8749 return NULL_RTX;
8751 return
8752 gen_rtx_PARALLEL (VOIDmode,
8753 gen_rtvec (2,
8754 gen_rtx_REG (SFmode, regno + 1),
8755 gen_rtx_REG (SFmode, regno)));
8758 static enum machine_mode
8759 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8760 int *punsignedp, const_tree funtype,
8761 int for_return)
8763 if (sh_promote_prototypes (funtype))
8764 return promote_mode (type, mode, punsignedp);
8765 else
8766 return default_promote_function_mode (type, mode, punsignedp, funtype,
8767 for_return);
8770 static bool
8771 sh_promote_prototypes (const_tree type)
8773 if (TARGET_HITACHI)
8774 return false;
8775 if (! type)
8776 return true;
8777 return ! sh_attr_renesas_p (type);
8780 /* Whether an argument must be passed by reference. On SHcompact, we
8781 pretend arguments wider than 32-bits that would have been passed in
8782 registers are passed by reference, so that an SHmedia trampoline
8783 loads them into the full 64-bits registers. */
8784 static int
8785 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8786 const_tree type, bool named)
8788 unsigned HOST_WIDE_INT size;
8790 if (type)
8791 size = int_size_in_bytes (type);
8792 else
8793 size = GET_MODE_SIZE (mode);
8795 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8796 && (!named
8797 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8798 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8799 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8800 && size > 4
8801 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8802 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8803 return size;
8804 else
8805 return 0;
8808 static bool
8809 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8810 const_tree type, bool named)
8812 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8814 if (targetm.calls.must_pass_in_stack (mode, type))
8815 return true;
8817 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8818 wants to know about pass-by-reference semantics for incoming
8819 arguments. */
8820 if (! cum)
8821 return false;
8823 if (TARGET_SHCOMPACT)
8825 cum->byref = shcompact_byref (cum, mode, type, named);
8826 return cum->byref != 0;
8829 return false;
8832 static bool
8833 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8834 const_tree type, bool named ATTRIBUTE_UNUSED)
8836 /* ??? How can it possibly be correct to return true only on the
8837 caller side of the equation? Is there someplace else in the
8838 sh backend that's magically producing the copies? */
8839 return (get_cumulative_args (cum)->outgoing
8840 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8841 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8844 /* Round a register number up to a proper boundary for an arg of mode
8845 MODE.
8846 The SH doesn't care about double alignment, so we only
8847 round doubles to even regs when asked to explicitly. */
8848 static int
8849 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
8851 /* FIXME: This used to be a macro and has been copy pasted into this
8852 function as is. Make this more readable. */
8853 return
8854 (((TARGET_ALIGN_DOUBLE
8855 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
8856 && (mode == DFmode || mode == DCmode)
8857 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
8858 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
8859 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
8860 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
8861 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
8864 /* Return true if arg of the specified mode should be be passed in a register
8865 or false otherwise. */
8866 static bool
8867 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
8868 const_tree type)
8870 /* FIXME: This used to be a macro and has been copy pasted into this
8871 function as is. Make this more readable. */
8872 return
8873 ((type == 0
8874 || (! TREE_ADDRESSABLE (type)
8875 && (! (TARGET_HITACHI || cum.renesas_abi)
8876 || ! (AGGREGATE_TYPE_P (type)
8877 || (!TARGET_FPU_ANY
8878 && (GET_MODE_CLASS (mode) == MODE_FLOAT
8879 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
8880 && ! cum.force_mem
8881 && (TARGET_SH2E
8882 ? ((mode) == BLKmode
8883 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
8884 + int_size_in_bytes (type))
8885 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
8886 : ((sh_round_reg (cum, mode)
8887 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
8888 <= NPARM_REGS (mode)))
8889 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
8892 static int
8893 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8894 tree type, bool named ATTRIBUTE_UNUSED)
8896 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8897 int words = 0;
8899 if (!TARGET_SH5
8900 && sh_pass_in_reg_p (*cum, mode, type)
8901 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8902 && (sh_round_reg (*cum, mode)
8903 + (mode != BLKmode
8904 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
8905 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
8906 > NPARM_REGS (mode)))
8907 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
8909 else if (!TARGET_SHCOMPACT
8910 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8911 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8913 return words * UNITS_PER_WORD;
8917 /* Define where to put the arguments to a function.
8918 Value is zero to push the argument on the stack,
8919 or a hard register in which to store the argument.
8921 MODE is the argument's machine mode.
8922 TYPE is the data type of the argument (as a tree).
8923 This is null for libcalls where that information may
8924 not be available.
8925 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8926 the preceding args and about the function being called.
8927 NAMED is nonzero if this argument is a named parameter
8928 (otherwise it is an extra parameter matching an ellipsis).
8930 On SH the first args are normally in registers
8931 and the rest are pushed. Any arg that starts within the first
8932 NPARM_REGS words is at least partially passed in a register unless
8933 its data type forbids. */
8934 static rtx
8935 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8936 const_tree type, bool named)
8938 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8940 if (! TARGET_SH5 && mode == VOIDmode)
8941 return GEN_INT (ca->renesas_abi ? 1 : 0);
8943 if (! TARGET_SH5
8944 && sh_pass_in_reg_p (*ca, mode, type)
8945 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8947 int regno;
8949 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8950 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8952 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8953 gen_rtx_REG (SFmode,
8954 BASE_ARG_REG (mode)
8955 + (sh_round_reg (*ca, mode) ^ 1)),
8956 const0_rtx);
8957 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8958 gen_rtx_REG (SFmode,
8959 BASE_ARG_REG (mode)
8960 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8961 GEN_INT (4));
8962 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8965 /* If the alignment of a DF value causes an SF register to be
8966 skipped, we will use that skipped register for the next SF
8967 value. */
8968 if ((TARGET_HITACHI || ca->renesas_abi)
8969 && ca->free_single_fp_reg
8970 && mode == SFmode)
8971 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8973 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8974 ^ (mode == SFmode && TARGET_SH4
8975 && TARGET_LITTLE_ENDIAN
8976 && ! TARGET_HITACHI && ! ca->renesas_abi);
8977 return gen_rtx_REG (mode, regno);
8981 if (TARGET_SH5)
8983 if (mode == VOIDmode && TARGET_SHCOMPACT)
8984 return GEN_INT (ca->call_cookie);
8986 /* The following test assumes unnamed arguments are promoted to
8987 DFmode. */
8988 if (mode == SFmode && ca->free_single_fp_reg)
8989 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8991 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8992 && (named || ! ca->prototype_p)
8993 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8995 if (! ca->prototype_p && TARGET_SHMEDIA)
8996 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8998 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8999 FIRST_FP_PARM_REG
9000 + ca->arg_count[(int) SH_ARG_FLOAT]);
9003 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
9004 && (! TARGET_SHCOMPACT
9005 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
9006 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
9007 type, named))))
9009 return gen_rtx_REG (mode, (FIRST_PARM_REG
9010 + ca->arg_count[(int) SH_ARG_INT]));
9013 return NULL_RTX;
9016 return NULL_RTX;
9019 /* Update the data in CUM to advance over an argument
9020 of mode MODE and data type TYPE.
9021 (TYPE is null for libcalls where that information may not be
9022 available.) */
9023 static void
9024 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
9025 const_tree type, bool named)
9027 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9029 if (ca->force_mem)
9030 ca->force_mem = 0;
9031 else if (TARGET_SH5)
9033 const_tree type2 = (ca->byref && type
9034 ? TREE_TYPE (type)
9035 : type);
9036 enum machine_mode mode2 = (ca->byref && type
9037 ? TYPE_MODE (type2)
9038 : mode);
9039 int dwords = ((ca->byref
9040 ? ca->byref
9041 : mode2 == BLKmode
9042 ? int_size_in_bytes (type2)
9043 : GET_MODE_SIZE (mode2)) + 7) / 8;
9044 int numregs = MIN (dwords, NPARM_REGS (SImode)
9045 - ca->arg_count[(int) SH_ARG_INT]);
9047 if (numregs)
9049 ca->arg_count[(int) SH_ARG_INT] += numregs;
9050 if (TARGET_SHCOMPACT
9051 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9053 ca->call_cookie
9054 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9055 - numregs, 1);
9056 /* N.B. We want this also for outgoing. */
9057 ca->stack_regs += numregs;
9059 else if (ca->byref)
9061 if (! ca->outgoing)
9062 ca->stack_regs += numregs;
9063 ca->byref_regs += numregs;
9064 ca->byref = 0;
9066 ca->call_cookie
9067 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9068 - numregs, 2);
9069 while (--numregs);
9070 ca->call_cookie
9071 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9072 - 1, 1);
9074 else if (dwords > numregs)
9076 int pushregs = numregs;
9078 if (TARGET_SHCOMPACT)
9079 ca->stack_regs += numregs;
9080 while (pushregs < NPARM_REGS (SImode) - 1
9081 && (CALL_COOKIE_INT_REG_GET
9082 (ca->call_cookie,
9083 NPARM_REGS (SImode) - pushregs)
9084 == 1))
9086 ca->call_cookie
9087 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9088 - pushregs, 1);
9089 pushregs++;
9091 if (numregs == NPARM_REGS (SImode))
9092 ca->call_cookie
9093 |= CALL_COOKIE_INT_REG (0, 1)
9094 | CALL_COOKIE_STACKSEQ (numregs - 1);
9095 else
9096 ca->call_cookie
9097 |= CALL_COOKIE_STACKSEQ (numregs);
9100 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9101 && (named || ! ca->prototype_p))
9103 if (mode2 == SFmode && ca->free_single_fp_reg)
9104 ca->free_single_fp_reg = 0;
9105 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9106 < NPARM_REGS (SFmode))
9108 int numfpregs
9109 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9110 NPARM_REGS (SFmode)
9111 - ca->arg_count[(int) SH_ARG_FLOAT]);
9113 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9115 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9117 if (ca->outgoing && numregs > 0)
9120 ca->call_cookie
9121 |= (CALL_COOKIE_INT_REG
9122 (ca->arg_count[(int) SH_ARG_INT]
9123 - numregs + ((numfpregs - 2) / 2),
9124 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9125 - numfpregs) / 2));
9127 while (numfpregs -= 2);
9129 else if (mode2 == SFmode && (named)
9130 && (ca->arg_count[(int) SH_ARG_FLOAT]
9131 < NPARM_REGS (SFmode)))
9132 ca->free_single_fp_reg
9133 = FIRST_FP_PARM_REG - numfpregs
9134 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9137 return;
9140 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9142 /* Note that we've used the skipped register. */
9143 if (mode == SFmode && ca->free_single_fp_reg)
9145 ca->free_single_fp_reg = 0;
9146 return;
9148 /* When we have a DF after an SF, there's an SF register that get
9149 skipped in order to align the DF value. We note this skipped
9150 register, because the next SF value will use it, and not the
9151 SF that follows the DF. */
9152 if (mode == DFmode
9153 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
9155 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
9156 + BASE_ARG_REG (mode));
9160 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9161 || sh_pass_in_reg_p (*ca, mode, type))
9162 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9163 = (sh_round_reg (*ca, mode)
9164 + (mode == BLKmode
9165 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9166 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
9169 /* The Renesas calling convention doesn't quite fit into this scheme since
9170 the address is passed like an invisible argument, but one that is always
9171 passed in memory. */
9172 static rtx
9173 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9175 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9176 return NULL_RTX;
9177 return gen_rtx_REG (Pmode, 2);
9180 /* Worker function for TARGET_FUNCTION_VALUE.
9182 For the SH, this is like LIBCALL_VALUE, except that we must change the
9183 mode like PROMOTE_MODE does.
9184 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9185 tested here has to be kept in sync with the one in
9186 explow.c:promote_mode. */
9187 static rtx
9188 sh_function_value (const_tree valtype,
9189 const_tree fn_decl_or_type,
9190 bool outgoing ATTRIBUTE_UNUSED)
9192 if (fn_decl_or_type
9193 && !DECL_P (fn_decl_or_type))
9194 fn_decl_or_type = NULL;
9196 return gen_rtx_REG (
9197 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9198 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9199 && (TREE_CODE (valtype) == INTEGER_TYPE
9200 || TREE_CODE (valtype) == ENUMERAL_TYPE
9201 || TREE_CODE (valtype) == BOOLEAN_TYPE
9202 || TREE_CODE (valtype) == REAL_TYPE
9203 || TREE_CODE (valtype) == OFFSET_TYPE))
9204 && sh_promote_prototypes (fn_decl_or_type)
9205 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9206 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9209 /* Worker function for TARGET_LIBCALL_VALUE. */
9210 static rtx
9211 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9213 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9216 /* Return true if N is a possible register number of function value. */
9217 static bool
9218 sh_function_value_regno_p (const unsigned int regno)
9220 return ((regno) == FIRST_RET_REG
9221 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9222 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9225 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9226 static bool
9227 sh_return_in_memory (const_tree type, const_tree fndecl)
9229 if (TARGET_SH5)
9231 if (TYPE_MODE (type) == BLKmode)
9232 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9233 else
9234 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9236 else
9238 return (TYPE_MODE (type) == BLKmode
9239 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9240 && TREE_CODE (type) == RECORD_TYPE));
9244 /* We actually emit the code in sh_expand_prologue. We used to use
9245 a static variable to flag that we need to emit this code, but that
9246 doesn't when inlining, when functions are deferred and then emitted
9247 later. Fortunately, we already have two flags that are part of struct
9248 function that tell if a function uses varargs or stdarg. */
9249 static void
9250 sh_setup_incoming_varargs (cumulative_args_t ca,
9251 enum machine_mode mode,
9252 tree type,
9253 int *pretend_arg_size,
9254 int second_time ATTRIBUTE_UNUSED)
9256 gcc_assert (cfun->stdarg);
9257 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9259 int named_parm_regs, anon_parm_regs;
9261 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
9262 + (mode == BLKmode
9263 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9264 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
9265 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9266 if (anon_parm_regs > 0)
9267 *pretend_arg_size = anon_parm_regs * 4;
9271 static bool
9272 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9274 return TARGET_SH5;
9277 static bool
9278 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9280 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9282 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9286 /* Define the offset between two registers, one to be eliminated, and
9287 the other its replacement, at the start of a routine. */
9289 initial_elimination_offset (int from, int to)
9291 int regs_saved;
9292 int regs_saved_rounding = 0;
9293 int total_saved_regs_space;
9294 int total_auto_space;
9295 int save_flags = target_flags;
9296 int copy_flags;
9297 HARD_REG_SET live_regs_mask;
9299 shmedia_space_reserved_for_target_registers = false;
9300 regs_saved = calc_live_regs (&live_regs_mask);
9301 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9303 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9305 shmedia_space_reserved_for_target_registers = true;
9306 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9309 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9310 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9311 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9313 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9314 copy_flags = target_flags;
9315 target_flags = save_flags;
9317 total_saved_regs_space = regs_saved + regs_saved_rounding;
9319 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9320 return total_saved_regs_space + total_auto_space
9321 + crtl->args.info.byref_regs * 8;
9323 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9324 return total_saved_regs_space + total_auto_space
9325 + crtl->args.info.byref_regs * 8;
9327 /* Initial gap between fp and sp is 0. */
9328 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9329 return 0;
9331 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9332 return rounded_frame_size (0);
9334 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9335 return rounded_frame_size (0);
9337 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9338 && (to == HARD_FRAME_POINTER_REGNUM
9339 || to == STACK_POINTER_REGNUM));
9340 if (TARGET_SH5)
9342 int n = total_saved_regs_space;
9343 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9344 save_schedule schedule;
9345 save_entry *entry;
9347 n += total_auto_space;
9349 /* If it wasn't saved, there's not much we can do. */
9350 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9351 return n;
9353 target_flags = copy_flags;
9355 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9356 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9357 if (entry->reg == pr_reg)
9359 target_flags = save_flags;
9360 return entry->offset;
9362 gcc_unreachable ();
9364 else
9365 return total_auto_space;
9368 /* Parse the -mfixed-range= option string. */
9369 void
9370 sh_fix_range (const char *const_str)
9372 int i, first, last;
9373 char *str, *dash, *comma;
9375 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9376 REG2 are either register names or register numbers. The effect
9377 of this option is to mark the registers in the range from REG1 to
9378 REG2 as ``fixed'' so they won't be used by the compiler. */
9380 i = strlen (const_str);
9381 str = (char *) alloca (i + 1);
9382 memcpy (str, const_str, i + 1);
9384 while (1)
9386 dash = strchr (str, '-');
9387 if (!dash)
9389 warning (0, "value of -mfixed-range must have form REG1-REG2");
9390 return;
9392 *dash = '\0';
9393 comma = strchr (dash + 1, ',');
9394 if (comma)
9395 *comma = '\0';
9397 first = decode_reg_name (str);
9398 if (first < 0)
9400 warning (0, "unknown register name: %s", str);
9401 return;
9404 last = decode_reg_name (dash + 1);
9405 if (last < 0)
9407 warning (0, "unknown register name: %s", dash + 1);
9408 return;
9411 *dash = '-';
9413 if (first > last)
9415 warning (0, "%s-%s is an empty range", str, dash + 1);
9416 return;
9419 for (i = first; i <= last; ++i)
9420 fixed_regs[i] = call_used_regs[i] = 1;
9422 if (!comma)
9423 break;
9425 *comma = ',';
9426 str = comma + 1;
9430 /* Insert any deferred function attributes from earlier pragmas. */
9431 static void
9432 sh_insert_attributes (tree node, tree *attributes)
9434 tree attrs;
9436 if (TREE_CODE (node) != FUNCTION_DECL)
9437 return;
9439 /* We are only interested in fields. */
9440 if (!DECL_P (node))
9441 return;
9443 /* Append the attributes to the deferred attributes. */
9444 *sh_deferred_function_attributes_tail = *attributes;
9445 attrs = sh_deferred_function_attributes;
9446 if (!attrs)
9447 return;
9449 /* Some attributes imply or require the interrupt attribute. */
9450 if (!lookup_attribute ("interrupt_handler", attrs)
9451 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9453 /* If we have a trapa_handler, but no interrupt_handler attribute,
9454 insert an interrupt_handler attribute. */
9455 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9456 /* We can't use sh_pr_interrupt here because that's not in the
9457 java frontend. */
9458 attrs
9459 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9460 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9461 if the interrupt attribute is missing, we ignore the attribute
9462 and warn. */
9463 else if (lookup_attribute ("sp_switch", attrs)
9464 || lookup_attribute ("trap_exit", attrs)
9465 || lookup_attribute ("nosave_low_regs", attrs)
9466 || lookup_attribute ("resbank", attrs))
9468 tree *tail;
9470 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9472 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9473 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9474 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9475 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9476 warning (OPT_Wattributes,
9477 "%qE attribute only applies to interrupt functions",
9478 TREE_PURPOSE (attrs));
9479 else
9481 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9482 NULL_TREE);
9483 tail = &TREE_CHAIN (*tail);
9486 attrs = *attributes;
9490 /* Install the processed list. */
9491 *attributes = attrs;
9493 /* Clear deferred attributes. */
9494 sh_deferred_function_attributes = NULL_TREE;
9495 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9497 return;
9500 /*------------------------------------------------------------------------------
9501 Target specific attributes
9502 Supported attributes are:
9504 * interrupt_handler
9505 Specifies this function is an interrupt handler.
9507 * trapa_handler
9508 Like interrupt_handler, but don't save all registers.
9510 * sp_switch
9511 Specifies an alternate stack for an interrupt handler to run on.
9513 * trap_exit
9514 Use a trapa to exit an interrupt function instead of rte.
9516 * nosave_low_regs
9517 Don't save r0..r7 in an interrupt handler function.
9518 This is useful on SH3* and SH4*, which have a separate set of low
9519 regs for user and privileged modes.
9520 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9521 those that run with interrupts disabled and thus can't be
9522 interrupted thenselves).
9524 * renesas
9525 Use Renesas calling/layout conventions (functions and structures).
9527 * resbank
9528 In case of an interrupt handler function, use a register bank to
9529 save registers R0-R14, MACH, MACL, GBR and PR.
9530 This is available only on SH2A targets.
9532 * function_vector
9533 Declares a function to be called using the TBR relative addressing
9534 mode. Takes an argument that specifies the slot number in the table
9535 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9538 /* Handle a 'resbank' attribute. */
9539 static tree
9540 sh_handle_resbank_handler_attribute (tree * node, tree name,
9541 tree args ATTRIBUTE_UNUSED,
9542 int flags ATTRIBUTE_UNUSED,
9543 bool * no_add_attrs)
9545 if (!TARGET_SH2A)
9547 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9548 name);
9549 *no_add_attrs = true;
9551 if (TREE_CODE (*node) != FUNCTION_DECL)
9553 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9554 name);
9555 *no_add_attrs = true;
9558 return NULL_TREE;
9561 /* Handle an "interrupt_handler" attribute; arguments as in
9562 struct attribute_spec.handler. */
9563 static tree
9564 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9565 tree args ATTRIBUTE_UNUSED,
9566 int flags ATTRIBUTE_UNUSED,
9567 bool *no_add_attrs)
9569 if (TREE_CODE (*node) != FUNCTION_DECL)
9571 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9572 name);
9573 *no_add_attrs = true;
9575 else if (TARGET_SHCOMPACT)
9577 error ("attribute interrupt_handler is not compatible with -m5-compact");
9578 *no_add_attrs = true;
9581 return NULL_TREE;
9584 /* Handle an 'function_vector' attribute; arguments as in
9585 struct attribute_spec.handler. */
9586 static tree
9587 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9588 tree args ATTRIBUTE_UNUSED,
9589 int flags ATTRIBUTE_UNUSED,
9590 bool * no_add_attrs)
9592 if (!TARGET_SH2A)
9594 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9595 name);
9596 *no_add_attrs = true;
9598 else if (TREE_CODE (*node) != FUNCTION_DECL)
9600 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9601 name);
9602 *no_add_attrs = true;
9604 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9606 /* The argument must be a constant integer. */
9607 warning (OPT_Wattributes,
9608 "%qE attribute argument not an integer constant",
9609 name);
9610 *no_add_attrs = true;
9612 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9614 /* The argument value must be between 0 to 255. */
9615 warning (OPT_Wattributes,
9616 "%qE attribute argument should be between 0 to 255",
9617 name);
9618 *no_add_attrs = true;
9620 return NULL_TREE;
9623 /* Returns true if current function has been assigned the attribute
9624 'function_vector'. */
9625 bool
9626 sh2a_is_function_vector_call (rtx x)
9628 if (GET_CODE (x) == SYMBOL_REF
9629 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9631 tree tr = SYMBOL_REF_DECL (x);
9633 if (sh2a_function_vector_p (tr))
9634 return true;
9637 return false;
9640 /* Returns the function vector number, if the attribute
9641 'function_vector' is assigned, otherwise returns zero. */
9643 sh2a_get_function_vector_number (rtx x)
9645 int num;
9646 tree list, t;
9648 if ((GET_CODE (x) == SYMBOL_REF)
9649 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9651 t = SYMBOL_REF_DECL (x);
9653 if (TREE_CODE (t) != FUNCTION_DECL)
9654 return 0;
9656 list = SH_ATTRIBUTES (t);
9657 while (list)
9659 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9661 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9662 return num;
9665 list = TREE_CHAIN (list);
9668 return 0;
9670 else
9671 return 0;
9674 /* Handle an "sp_switch" attribute; arguments as in
9675 struct attribute_spec.handler. */
9676 static tree
9677 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9678 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9680 if (TREE_CODE (*node) != FUNCTION_DECL)
9682 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9683 name);
9684 *no_add_attrs = true;
9686 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9688 /* The argument must be a constant string. */
9689 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9690 name);
9691 *no_add_attrs = true;
9694 return NULL_TREE;
9697 /* Handle an "trap_exit" attribute; arguments as in
9698 struct attribute_spec.handler. */
9699 static tree
9700 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9701 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9703 if (TREE_CODE (*node) != FUNCTION_DECL)
9705 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9706 name);
9707 *no_add_attrs = true;
9709 /* The argument specifies a trap number to be used in a trapa instruction
9710 at function exit (instead of an rte instruction). */
9711 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9713 /* The argument must be a constant integer. */
9714 warning (OPT_Wattributes, "%qE attribute argument not an "
9715 "integer constant", name);
9716 *no_add_attrs = true;
9719 return NULL_TREE;
9722 static tree
9723 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9724 tree name ATTRIBUTE_UNUSED,
9725 tree args ATTRIBUTE_UNUSED,
9726 int flags ATTRIBUTE_UNUSED,
9727 bool *no_add_attrs ATTRIBUTE_UNUSED)
9729 return NULL_TREE;
9732 /* True if __attribute__((renesas)) or -mrenesas. */
9733 bool
9734 sh_attr_renesas_p (const_tree td)
9736 if (TARGET_HITACHI)
9737 return true;
9738 if (td == NULL_TREE)
9739 return false;
9740 if (DECL_P (td))
9741 td = TREE_TYPE (td);
9742 if (td == error_mark_node)
9743 return false;
9744 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9745 != NULL_TREE);
9748 /* True if __attribute__((renesas)) or -mrenesas, for the current
9749 function. */
9750 bool
9751 sh_cfun_attr_renesas_p (void)
9753 return sh_attr_renesas_p (current_function_decl);
9756 /* Returns true if the current function has the "interrupt_handler"
9757 attribute set. */
9758 bool
9759 sh_cfun_interrupt_handler_p (void)
9761 return (lookup_attribute ("interrupt_handler",
9762 DECL_ATTRIBUTES (current_function_decl))
9763 != NULL_TREE);
9766 /* Returns true if FUNC has been assigned the attribute
9767 "function_vector". */
9768 bool
9769 sh2a_function_vector_p (tree func)
9771 tree list;
9772 if (TREE_CODE (func) != FUNCTION_DECL)
9773 return false;
9775 list = SH_ATTRIBUTES (func);
9776 while (list)
9778 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9779 return true;
9781 list = TREE_CHAIN (list);
9783 return false;
9786 /* Returns true if given tree has the "resbank" attribute set. */
9787 bool
9788 sh_cfun_resbank_handler_p (void)
9790 return ((lookup_attribute ("resbank",
9791 DECL_ATTRIBUTES (current_function_decl))
9792 != NULL_TREE)
9793 && (lookup_attribute ("interrupt_handler",
9794 DECL_ATTRIBUTES (current_function_decl))
9795 != NULL_TREE) && TARGET_SH2A);
9798 /* Returns true if the current function has a "trap_exit" attribute set. */
9799 bool
9800 sh_cfun_trap_exit_p (void)
9802 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9803 != NULL_TREE;
9806 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9807 static const char *
9808 sh_check_pch_target_flags (int old_flags)
9810 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9811 | MASK_SH_E | MASK_HARD_SH4
9812 | MASK_FPU_SINGLE | MASK_SH4))
9813 return _("created and used with different architectures / ABIs");
9814 if ((old_flags ^ target_flags) & MASK_HITACHI)
9815 return _("created and used with different ABIs");
9816 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9817 return _("created and used with different endianness");
9818 return NULL;
9821 /* Predicates used by the templates. */
9823 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9824 Used only in general_movsrc_operand. */
9825 bool
9826 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9828 switch (REGNO (op))
9830 case PR_REG:
9831 case MACL_REG:
9832 case MACH_REG:
9833 return true;
9835 return false;
9838 /* Returns true if OP is a floating point value with value 0.0. */
9839 bool
9840 fp_zero_operand (rtx op)
9842 REAL_VALUE_TYPE r;
9844 if (GET_MODE (op) != SFmode)
9845 return false;
9847 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9848 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9851 /* Returns true if OP is a floating point value with value 1.0. */
9852 bool
9853 fp_one_operand (rtx op)
9855 REAL_VALUE_TYPE r;
9857 if (GET_MODE (op) != SFmode)
9858 return false;
9860 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9861 return REAL_VALUES_EQUAL (r, dconst1);
9864 /* In general mode switching is used. If we are
9865 compiling without -mfmovd, movsf_ie isn't taken into account for
9866 mode switching. We could check in machine_dependent_reorg for
9867 cases where we know we are in single precision mode, but there is
9868 interface to find that out during reload, so we must avoid
9869 choosing an fldi alternative during reload and thus failing to
9870 allocate a scratch register for the constant loading. */
9871 bool
9872 fldi_ok (void)
9874 return true;
9877 /* Return the TLS type for TLS symbols. */
9878 enum tls_model
9879 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9881 if (GET_CODE (op) != SYMBOL_REF)
9882 return TLS_MODEL_NONE;
9883 return SYMBOL_REF_TLS_MODEL (op);
9886 /* Return the destination address of a branch. */
9887 static int
9888 branch_dest (rtx branch)
9890 rtx dest = SET_SRC (PATTERN (branch));
9891 int dest_uid;
9893 if (GET_CODE (dest) == IF_THEN_ELSE)
9894 dest = XEXP (dest, 1);
9895 dest = XEXP (dest, 0);
9896 dest_uid = INSN_UID (dest);
9897 return INSN_ADDRESSES (dest_uid);
9900 /* Return nonzero if REG is not used after INSN.
9901 We assume REG is a reload reg, and therefore does
9902 not live past labels. It may live past calls or jumps though. */
9903 bool
9904 reg_unused_after (rtx reg, rtx insn)
9906 enum rtx_code code;
9907 rtx set;
9909 /* If the reg is set by this instruction, then it is safe for our
9910 case. Disregard the case where this is a store to memory, since
9911 we are checking a register used in the store address. */
9912 set = single_set (insn);
9913 if (set && !MEM_P (SET_DEST (set))
9914 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9915 return true;
9917 while ((insn = NEXT_INSN (insn)))
9919 rtx set;
9920 if (!INSN_P (insn))
9921 continue;
9923 code = GET_CODE (insn);
9925 #if 0
9926 /* If this is a label that existed before reload, then the register
9927 is dead here. However, if this is a label added by reorg, then
9928 the register may still be live here. We can't tell the difference,
9929 so we just ignore labels completely. */
9930 if (code == CODE_LABEL)
9931 return 1;
9932 /* else */
9933 #endif
9935 if (code == JUMP_INSN)
9936 return false;
9938 /* If this is a sequence, we must handle them all at once.
9939 We could have for instance a call that sets the target register,
9940 and an insn in a delay slot that uses the register. In this case,
9941 we must return 0. */
9942 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9944 int i;
9945 int retval = 0;
9947 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9949 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9950 rtx set = single_set (this_insn);
9952 if (CALL_P (this_insn))
9953 code = CALL_INSN;
9954 else if (JUMP_P (this_insn))
9956 if (INSN_ANNULLED_BRANCH_P (this_insn))
9957 return false;
9958 code = JUMP_INSN;
9961 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9962 return false;
9963 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9965 if (!MEM_P (SET_DEST (set)))
9966 retval = true;
9967 else
9968 return false;
9970 if (set == NULL_RTX
9971 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9972 return false;
9974 if (retval == 1)
9975 return true;
9976 else if (code == JUMP_INSN)
9977 return false;
9980 set = single_set (insn);
9981 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9982 return false;
9983 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9984 return !MEM_P (SET_DEST (set));
9985 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9986 return false;
9988 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9989 return true;
9991 return true;
9994 #include "ggc.h"
9996 static GTY(()) rtx t_reg_rtx;
9998 get_t_reg_rtx (void)
10000 if (! t_reg_rtx)
10001 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
10002 return t_reg_rtx;
10005 static GTY(()) rtx fpscr_rtx;
10007 get_fpscr_rtx (void)
10009 if (! fpscr_rtx)
10011 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
10012 REG_USERVAR_P (fpscr_rtx) = 1;
10013 mark_user_reg (fpscr_rtx);
10015 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
10016 mark_user_reg (fpscr_rtx);
10017 return fpscr_rtx;
10020 static GTY(()) tree fpscr_values;
10022 static void
10023 emit_fpu_switch (rtx scratch, int index)
10025 rtx dst, src;
10027 if (fpscr_values == NULL)
10029 tree t;
10031 t = build_index_type (integer_one_node);
10032 t = build_array_type (integer_type_node, t);
10033 t = build_decl (BUILTINS_LOCATION,
10034 VAR_DECL, get_identifier ("__fpscr_values"), t);
10035 DECL_ARTIFICIAL (t) = 1;
10036 DECL_IGNORED_P (t) = 1;
10037 DECL_EXTERNAL (t) = 1;
10038 TREE_STATIC (t) = 1;
10039 TREE_PUBLIC (t) = 1;
10040 TREE_USED (t) = 1;
10042 fpscr_values = t;
10045 src = DECL_RTL (fpscr_values);
10046 if (!can_create_pseudo_p ())
10048 emit_move_insn (scratch, XEXP (src, 0));
10049 if (index != 0)
10050 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10051 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
10053 else
10054 src = adjust_address (src, PSImode, index * 4);
10056 dst = get_fpscr_rtx ();
10057 emit_move_insn (dst, src);
10060 void
10061 emit_sf_insn (rtx pat)
10063 emit_insn (pat);
10066 void
10067 emit_df_insn (rtx pat)
10069 emit_insn (pat);
10072 void
10073 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10075 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10078 void
10079 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10081 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
10082 get_fpscr_rtx ()));
10085 void
10086 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10088 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10091 void
10092 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10094 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
10095 get_fpscr_rtx ()));
10098 static rtx get_free_reg (HARD_REG_SET);
10100 /* This function returns a register to use to load the address to load
10101 the fpscr from. Currently it always returns r1 or r7, but when we are
10102 able to use pseudo registers after combine, or have a better mechanism
10103 for choosing a register, it should be done here. */
10104 /* REGS_LIVE is the liveness information for the point for which we
10105 need this allocation. In some bare-bones exit blocks, r1 is live at the
10106 start. We can even have all of r0..r3 being live:
10107 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10108 INSN before which new insns are placed with will clobber the register
10109 we return. If a basic block consists only of setting the return value
10110 register to a pseudo and using that register, the return value is not
10111 live before or after this block, yet we we'll insert our insns right in
10112 the middle. */
10113 static rtx
10114 get_free_reg (HARD_REG_SET regs_live)
10116 if (! TEST_HARD_REG_BIT (regs_live, 1))
10117 return gen_rtx_REG (Pmode, 1);
10119 /* Hard reg 1 is live; since this is a small register classes target,
10120 there shouldn't be anything but a jump before the function end. */
10121 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10122 return gen_rtx_REG (Pmode, 7);
10125 /* This function will set the fpscr from memory.
10126 MODE is the mode we are setting it to. */
10127 void
10128 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10130 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10131 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10132 rtx addr_reg;
10134 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10135 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10138 /* Is the given character a logical line separator for the assembler? */
10139 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10140 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10141 #endif
10143 static bool
10144 sequence_insn_p (rtx insn)
10146 rtx prev, next;
10148 prev = PREV_INSN (insn);
10149 if (prev == NULL)
10150 return false;
10152 next = NEXT_INSN (prev);
10153 if (next == NULL)
10154 return false;
10156 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10160 sh_insn_length_adjustment (rtx insn)
10162 /* Instructions with unfilled delay slots take up an extra two bytes for
10163 the nop in the delay slot. */
10164 if (((NONJUMP_INSN_P (insn)
10165 && GET_CODE (PATTERN (insn)) != USE
10166 && GET_CODE (PATTERN (insn)) != CLOBBER)
10167 || CALL_P (insn) || JUMP_P (insn))
10168 && ! sequence_insn_p (insn)
10169 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10170 return 2;
10172 /* SH2e has a bug that prevents the use of annulled branches, so if
10173 the delay slot is not filled, we'll have to put a NOP in it. */
10174 if (sh_cpu_attr == CPU_SH2E
10175 && JUMP_P (insn)
10176 && get_attr_type (insn) == TYPE_CBRANCH
10177 && ! sequence_insn_p (insn))
10178 return 2;
10180 /* sh-dsp parallel processing insn take four bytes instead of two. */
10182 if (NONJUMP_INSN_P (insn))
10184 int sum = 0;
10185 rtx body = PATTERN (insn);
10186 const char *templ;
10187 char c;
10188 bool maybe_label = true;
10190 if (GET_CODE (body) == ASM_INPUT)
10191 templ = XSTR (body, 0);
10192 else if (asm_noperands (body) >= 0)
10193 templ
10194 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10195 else
10196 return 0;
10199 int ppi_adjust = 0;
10202 c = *templ++;
10203 while (c == ' ' || c == '\t');
10204 /* all sh-dsp parallel-processing insns start with p.
10205 The only non-ppi sh insn starting with p is pref.
10206 The only ppi starting with pr is prnd. */
10207 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10208 ppi_adjust = 2;
10209 /* The repeat pseudo-insn expands two three insns, a total of
10210 six bytes in size. */
10211 else if ((c == 'r' || c == 'R')
10212 && ! strncasecmp ("epeat", templ, 5))
10213 ppi_adjust = 4;
10214 while (c && c != '\n'
10215 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10217 /* If this is a label, it is obviously not a ppi insn. */
10218 if (c == ':' && maybe_label)
10220 ppi_adjust = 0;
10221 break;
10223 else if (c == '\'' || c == '"')
10224 maybe_label = false;
10225 c = *templ++;
10227 sum += ppi_adjust;
10228 maybe_label = c != ':';
10230 while (c);
10231 return sum;
10233 return 0;
10236 /* Return TRUE for a valid displacement for the REG+disp addressing
10237 with MODE. */
10238 bool
10239 sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
10240 bool allow_zero)
10242 if (! CONST_INT_P (op))
10243 return false;
10245 if (TARGET_SHMEDIA)
10247 int size;
10249 /* Check if this is the address of an unaligned load / store. */
10250 if (mode == VOIDmode)
10251 return satisfies_constraint_I06 (op);
10253 size = GET_MODE_SIZE (mode);
10254 return (!(INTVAL (op) & (size - 1))
10255 && INTVAL (op) >= -512 * size
10256 && INTVAL (op) < 512 * size);
10258 else
10260 const HOST_WIDE_INT offset = INTVAL (op);
10261 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10262 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10264 /* If the mode does not support any displacement always return false.
10265 Even though an index of '0' is actually always valid, it will cause
10266 troubles when e.g. a DFmode move is split into two SFmode moves,
10267 where one SFmode move will have index '0' and the other move will
10268 have index '4'. */
10269 if (!allow_zero && max_disp < 1)
10270 return false;
10272 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10276 /* Recognize an RTL expression that is a valid memory address for
10277 an instruction.
10278 The MODE argument is the machine mode for the MEM expression
10279 that wants to use this address.
10280 Allow REG
10281 REG+disp
10282 REG+r0
10283 REG++
10284 --REG
10286 GBR+disp */
10287 static bool
10288 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
10290 if (REG_P (x) && REGNO (x) == GBR_REG)
10291 return true;
10293 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10294 return true;
10295 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10296 && ! TARGET_SHMEDIA
10297 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10298 return true;
10299 else if (GET_CODE (x) == PLUS
10300 && (mode != PSImode || reload_completed))
10302 rtx xop0 = XEXP (x, 0);
10303 rtx xop1 = XEXP (x, 1);
10305 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10306 return gbr_displacement (xop1, mode);
10308 if (GET_MODE_SIZE (mode) <= 8
10309 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10310 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10311 return true;
10313 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10314 || ((xop0 == stack_pointer_rtx
10315 || xop0 == hard_frame_pointer_rtx)
10316 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10317 || ((xop1 == stack_pointer_rtx
10318 || xop1 == hard_frame_pointer_rtx)
10319 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10320 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10321 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10322 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10323 && TARGET_FMOVD && mode == DFmode)))
10325 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10326 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10327 return true;
10328 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10329 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10330 return true;
10334 return false;
10337 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10338 isn't protected by a PIC unspec. */
10339 bool
10340 nonpic_symbol_mentioned_p (rtx x)
10342 const char *fmt;
10343 int i;
10345 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10346 || GET_CODE (x) == PC)
10347 return true;
10349 /* We don't want to look into the possible MEM location of a
10350 CONST_DOUBLE, since we're not going to use it, in general. */
10351 if (GET_CODE (x) == CONST_DOUBLE)
10352 return false;
10354 if (GET_CODE (x) == UNSPEC
10355 && (XINT (x, 1) == UNSPEC_PIC
10356 || XINT (x, 1) == UNSPEC_GOT
10357 || XINT (x, 1) == UNSPEC_GOTOFF
10358 || XINT (x, 1) == UNSPEC_GOTPLT
10359 || XINT (x, 1) == UNSPEC_GOTTPOFF
10360 || XINT (x, 1) == UNSPEC_DTPOFF
10361 || XINT (x, 1) == UNSPEC_TPOFF
10362 || XINT (x, 1) == UNSPEC_PLT
10363 || XINT (x, 1) == UNSPEC_SYMOFF
10364 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10365 return false;
10367 fmt = GET_RTX_FORMAT (GET_CODE (x));
10368 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10370 if (fmt[i] == 'E')
10372 int j;
10373 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10374 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10375 return true;
10377 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10378 return true;
10381 return false;
10384 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10385 @GOTOFF in `reg'. */
10387 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
10388 rtx reg)
10390 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10391 return orig;
10393 if (GET_CODE (orig) == LABEL_REF
10394 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10396 if (reg == NULL_RTX)
10397 reg = gen_reg_rtx (Pmode);
10399 emit_insn (gen_symGOTOFF2reg (reg, orig));
10400 return reg;
10402 else if (GET_CODE (orig) == SYMBOL_REF)
10404 if (reg == NULL_RTX)
10405 reg = gen_reg_rtx (Pmode);
10407 emit_insn (gen_symGOT2reg (reg, orig));
10408 return reg;
10410 return orig;
10413 /* Given a (logical) mode size and an offset in bytes, try to find a the
10414 appropriate displacement value for a mov insn. On SH the displacements
10415 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10416 15 bytes in QImode. To compensate this we create a new base address by
10417 adding an adjustment value to it.
10419 If the originally requested offset is greater than 127 we prefer using
10420 values 124..127 over 128..131 to increase opportunities to use the
10421 add #imm, Rn insn.
10423 In some cases it is possible that a requested offset might seem unaligned
10424 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10425 This is compensated by adjusting the base address so that the effective
10426 address of the displacement move insn will be aligned.
10428 This is not the best possible way of rebasing the base address, as it
10429 does not look at other present displacement addressings around it.
10430 In some cases this can create more base address adjustments than would
10431 actually be necessary. */
10432 struct disp_adjust
10434 rtx offset_adjust;
10435 rtx mov_disp;
10438 static struct disp_adjust
10439 sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
10441 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10443 /* Do not try to use SH2A's large displacements here, because this would
10444 effectively disable the small displacement insns. */
10445 const int mode_sz = GET_MODE_SIZE (mode);
10446 const int mov_insn_sz = mov_insn_size (mode, false);
10447 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10448 const int max_disp_next = max_disp + mov_insn_sz;
10449 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10450 HOST_WIDE_INT offset_adjust;
10452 /* In some cases this actually does happen and we must check for it. */
10453 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10454 return res;
10456 /* Keeps the previous behavior for QImode displacement addressing.
10457 This just decides how the offset is re-based. Removing this special
10458 case will result in slightly bigger code on average, but it's not that
10459 bad actually. */
10460 if (mov_insn_sz == 1)
10461 align_modifier = 0;
10463 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10465 if (mode_sz + offset - offset_adjust <= max_disp_next)
10467 res.offset_adjust = GEN_INT (offset_adjust);
10468 res.mov_disp = GEN_INT (offset - offset_adjust);
10471 return res;
10474 /* Try to modify an illegitimate address and make it legitimate.
10475 If we find one, return the new, valid address.
10476 Otherwise, return the original address. */
10477 static rtx
10478 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
10480 if (flag_pic)
10481 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10483 if (TARGET_SHMEDIA)
10484 return x;
10486 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10487 || (TARGET_SH2E && mode == SFmode))
10488 return x;
10490 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10491 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10493 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10494 INTVAL (XEXP (x, 1)));
10496 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10498 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10499 adj.offset_adjust, NULL_RTX, 0,
10500 OPTAB_LIB_WIDEN);
10501 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10505 return x;
10508 /* Attempt to replace *p, which is an address that needs reloading, with
10509 a valid memory address for an operand of mode MODE.
10510 Like for sh_legitimize_address, for the SH we try to get a normal form
10511 of the address. That will allow inheritance of the address reloads. */
10512 bool
10513 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
10514 int itype)
10516 enum reload_type type = (enum reload_type) itype;
10517 const int mode_sz = GET_MODE_SIZE (mode);
10519 if (TARGET_SHMEDIA)
10520 return false;
10522 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10523 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10524 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
10525 && (ALLOW_INDEXED_ADDRESS
10526 || XEXP (*p, 0) == stack_pointer_rtx
10527 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10529 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10530 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10532 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10534 push_reload (*p, NULL_RTX, p, NULL,
10535 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10536 return true;
10539 if (TARGET_SH2E && mode == SFmode)
10541 *p = copy_rtx (*p);
10542 push_reload (*p, NULL_RTX, p, NULL,
10543 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10544 return true;
10547 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10548 moves because then reload has a problem figuring the constraint
10549 that the move insn target/source reg must be R0.
10550 Or maybe some handling is wrong in sh_secondary_reload for this
10551 to work properly? */
10552 if ((mode_sz == 4 || mode_sz == 8)
10553 && ! (TARGET_SH4 && mode == DFmode)
10554 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10556 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10557 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10558 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10559 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10560 return true;
10564 /* We must re-recognize what we created before. */
10565 if (GET_CODE (*p) == PLUS
10566 && (mode_sz == 4 || mode_sz == 8)
10567 && GET_CODE (XEXP (*p, 0)) == PLUS
10568 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10569 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10570 && CONST_INT_P (XEXP (*p, 1))
10571 && ! (TARGET_SH2E && mode == SFmode))
10573 /* Because this address is so complex, we know it must have
10574 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10575 it is already unshared, and needs no further unsharing. */
10576 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10577 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10578 return true;
10581 return false;
10584 /* In the name of slightly smaller debug output, and to cater to
10585 general assembler lossage, recognize various UNSPEC sequences
10586 and turn them back into a direct symbol reference. */
10587 static rtx
10588 sh_delegitimize_address (rtx orig_x)
10590 rtx x, y;
10592 orig_x = delegitimize_mem_from_attrs (orig_x);
10594 x = orig_x;
10595 if (MEM_P (x))
10596 x = XEXP (x, 0);
10597 if (GET_CODE (x) == CONST)
10599 y = XEXP (x, 0);
10600 if (GET_CODE (y) == UNSPEC)
10602 if (XINT (y, 1) == UNSPEC_GOT
10603 || XINT (y, 1) == UNSPEC_GOTOFF
10604 || XINT (y, 1) == UNSPEC_SYMOFF)
10605 return XVECEXP (y, 0, 0);
10606 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10608 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10610 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10612 if (GET_CODE (symplt) == UNSPEC
10613 && XINT (symplt, 1) == UNSPEC_PLT)
10614 return XVECEXP (symplt, 0, 0);
10617 else if (TARGET_SHMEDIA
10618 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10619 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10621 rtx offset = XVECEXP (y, 0, 1);
10623 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10624 if (MEM_P (orig_x))
10625 x = replace_equiv_address_nv (orig_x, x);
10626 return x;
10631 return orig_x;
10634 /* Mark the use of a constant in the literal table. If the constant
10635 has multiple labels, make it unique. */
10636 static rtx
10637 mark_constant_pool_use (rtx x)
10639 rtx insn, lab, pattern;
10641 if (x == NULL_RTX)
10642 return x;
10644 switch (GET_CODE (x))
10646 case LABEL_REF:
10647 x = XEXP (x, 0);
10648 case CODE_LABEL:
10649 break;
10650 default:
10651 return x;
10654 /* Get the first label in the list of labels for the same constant
10655 and delete another labels in the list. */
10656 lab = x;
10657 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
10659 if (!LABEL_P (insn)
10660 || LABEL_REFS (insn) != NEXT_INSN (insn))
10661 break;
10662 lab = insn;
10665 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10666 INSN_DELETED_P (insn) = 1;
10668 /* Mark constants in a window. */
10669 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
10671 if (!NONJUMP_INSN_P (insn))
10672 continue;
10674 pattern = PATTERN (insn);
10675 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10676 continue;
10678 switch (XINT (pattern, 1))
10680 case UNSPECV_CONST2:
10681 case UNSPECV_CONST4:
10682 case UNSPECV_CONST8:
10683 XVECEXP (pattern, 0, 1) = const1_rtx;
10684 break;
10685 case UNSPECV_WINDOW_END:
10686 if (XVECEXP (pattern, 0, 0) == x)
10687 return lab;
10688 break;
10689 case UNSPECV_CONST_END:
10690 return lab;
10691 default:
10692 break;
10696 return lab;
10699 /* Return true if it's possible to redirect BRANCH1 to the destination
10700 of an unconditional jump BRANCH2. We only want to do this if the
10701 resulting branch will have a short displacement. */
10702 bool
10703 sh_can_redirect_branch (rtx branch1, rtx branch2)
10705 if (flag_expensive_optimizations && simplejump_p (branch2))
10707 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10708 rtx insn;
10709 int distance;
10711 for (distance = 0, insn = NEXT_INSN (branch1);
10712 insn && distance < 256;
10713 insn = PREV_INSN (insn))
10715 if (insn == dest)
10716 return true;
10717 else
10718 distance += get_attr_length (insn);
10720 for (distance = 0, insn = NEXT_INSN (branch1);
10721 insn && distance < 256;
10722 insn = NEXT_INSN (insn))
10724 if (insn == dest)
10725 return true;
10726 else
10727 distance += get_attr_length (insn);
10730 return false;
10733 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10734 bool
10735 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10736 unsigned int new_reg)
10738 /* Interrupt functions can only use registers that have already been
10739 saved by the prologue, even if they would normally be
10740 call-clobbered. */
10741 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10742 return false;
10744 return true;
10747 /* Function to update the integer COST
10748 based on the relationship between INSN that is dependent on
10749 DEP_INSN through the dependence LINK. The default is to make no
10750 adjustment to COST. This can be used for example to specify to
10751 the scheduler that an output- or anti-dependence does not incur
10752 the same cost as a data-dependence. The return value should be
10753 the new value for COST. */
10754 static int
10755 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10757 rtx reg, use_pat;
10759 if (TARGET_SHMEDIA)
10761 /* On SHmedia, if the dependence is an anti-dependence or
10762 output-dependence, there is no cost. */
10763 if (REG_NOTE_KIND (link) != 0)
10765 /* However, dependencies between target register loads and
10766 uses of the register in a subsequent block that are separated
10767 by a conditional branch are not modelled - we have to do with
10768 the anti-dependency between the target register load and the
10769 conditional branch that ends the current block. */
10770 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10771 && GET_CODE (PATTERN (dep_insn)) == SET
10772 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10773 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10774 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10776 int orig_cost = cost;
10777 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10778 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10779 ? insn : JUMP_LABEL (insn));
10780 /* On the likely path, the branch costs 1, on the unlikely path,
10781 it costs 3. */
10782 cost--;
10784 target = next_active_insn (target);
10785 while (target && ! flow_dependent_p (target, dep_insn)
10786 && --cost > 0);
10787 /* If two branches are executed in immediate succession, with the
10788 first branch properly predicted, this causes a stall at the
10789 second branch, hence we won't need the target for the
10790 second branch for two cycles after the launch of the first
10791 branch. */
10792 if (cost > orig_cost - 2)
10793 cost = orig_cost - 2;
10795 else
10796 cost = 0;
10799 else if (get_attr_is_mac_media (insn)
10800 && get_attr_is_mac_media (dep_insn))
10801 cost = 1;
10803 else if (! reload_completed
10804 && GET_CODE (PATTERN (insn)) == SET
10805 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10806 && GET_CODE (PATTERN (dep_insn)) == SET
10807 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10808 && cost < 4)
10809 cost = 4;
10810 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10811 that is needed at the target. */
10812 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10813 && ! flow_dependent_p (insn, dep_insn))
10814 cost--;
10816 else if (REG_NOTE_KIND (link) == 0)
10818 enum attr_type type;
10819 rtx dep_set;
10821 if (recog_memoized (insn) < 0
10822 || recog_memoized (dep_insn) < 0)
10823 return cost;
10825 dep_set = single_set (dep_insn);
10827 /* The latency that we specify in the scheduling description refers
10828 to the actual output, not to an auto-increment register; for that,
10829 the latency is one. */
10830 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10832 rtx set = single_set (insn);
10834 if (set
10835 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10836 && (!MEM_P (SET_DEST (set))
10837 || !reg_mentioned_p (SET_DEST (dep_set),
10838 XEXP (SET_DEST (set), 0))))
10839 cost = 1;
10841 /* The only input for a call that is timing-critical is the
10842 function's address. */
10843 if (CALL_P (insn))
10845 rtx call = get_call_rtx_from (insn);
10846 if (call
10847 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10848 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10849 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10850 cost -= TARGET_SH4_300 ? 3 : 6;
10852 /* Likewise, the most timing critical input for an sfuncs call
10853 is the function address. However, sfuncs typically start
10854 using their arguments pretty quickly.
10855 Assume a four cycle delay for SH4 before they are needed.
10856 Cached ST40-300 calls are quicker, so assume only a one
10857 cycle delay there.
10858 ??? Maybe we should encode the delays till input registers
10859 are needed by sfuncs into the sfunc call insn. */
10860 /* All sfunc calls are parallels with at least four components.
10861 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10862 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10863 && XVECLEN (PATTERN (insn), 0) >= 4
10864 && (reg = sfunc_uses_reg (insn)))
10866 if (! reg_set_p (reg, dep_insn))
10867 cost -= TARGET_SH4_300 ? 1 : 4;
10869 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10871 enum attr_type dep_type = get_attr_type (dep_insn);
10873 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10874 cost--;
10875 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10876 && (type = get_attr_type (insn)) != TYPE_CALL
10877 && type != TYPE_SFUNC)
10878 cost--;
10879 /* When the preceding instruction loads the shift amount of
10880 the following SHAD/SHLD, the latency of the load is increased
10881 by 1 cycle. */
10882 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10883 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10884 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10885 XEXP (SET_SRC (single_set (insn)),
10886 1)))
10887 cost++;
10888 /* When an LS group instruction with a latency of less than
10889 3 cycles is followed by a double-precision floating-point
10890 instruction, FIPR, or FTRV, the latency of the first
10891 instruction is increased to 3 cycles. */
10892 else if (cost < 3
10893 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10894 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10895 cost = 3;
10896 /* The lsw register of a double-precision computation is ready one
10897 cycle earlier. */
10898 else if (reload_completed
10899 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10900 && (use_pat = single_set (insn))
10901 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10902 SET_SRC (use_pat)))
10903 cost -= 1;
10905 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10906 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10907 cost -= 1;
10909 else if (TARGET_SH4_300)
10911 /* Stores need their input register two cycles later. */
10912 if (dep_set && cost >= 1
10913 && ((type = get_attr_type (insn)) == TYPE_STORE
10914 || type == TYPE_PSTORE
10915 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10917 rtx set = single_set (insn);
10919 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10920 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10922 cost -= 2;
10923 /* But don't reduce the cost below 1 if the address depends
10924 on a side effect of dep_insn. */
10925 if (cost < 1
10926 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10927 cost = 1;
10932 /* An anti-dependence penalty of two applies if the first insn is a double
10933 precision fadd / fsub / fmul. */
10934 else if (!TARGET_SH4_300
10935 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10936 && recog_memoized (dep_insn) >= 0
10937 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10938 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10939 /* A lot of alleged anti-flow dependences are fake,
10940 so check this one is real. */
10941 && flow_dependent_p (dep_insn, insn))
10942 cost = 2;
10944 return cost;
10947 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10948 if DEP_INSN is anti-flow dependent on INSN. */
10949 static bool
10950 flow_dependent_p (rtx insn, rtx dep_insn)
10952 rtx tmp = PATTERN (insn);
10954 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10955 return tmp == NULL_RTX;
10958 /* A helper function for flow_dependent_p called through note_stores. */
10959 static void
10960 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10962 rtx * pinsn = (rtx *) data;
10964 if (*pinsn && reg_referenced_p (x, *pinsn))
10965 *pinsn = NULL_RTX;
10968 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10969 'special function' patterns (type sfunc) that clobber pr, but that
10970 do not look like function calls to leaf_function_p. Hence we must
10971 do this extra check. */
10972 static int
10973 sh_pr_n_sets (void)
10975 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10978 /* Return where to allocate pseudo for a given hard register initial
10979 value. */
10980 static rtx
10981 sh_allocate_initial_value (rtx hard_reg)
10983 rtx x;
10985 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10987 if (crtl->is_leaf
10988 && ! sh_pr_n_sets ()
10989 && ! (TARGET_SHCOMPACT
10990 && ((crtl->args.info.call_cookie
10991 & ~ CALL_COOKIE_RET_TRAMP (1))
10992 || crtl->saves_all_registers)))
10993 x = hard_reg;
10994 else
10995 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10997 else
10998 x = NULL_RTX;
11000 return x;
11003 /* This function returns "2" to indicate dual issue for the SH4
11004 processor. To be used by the DFA pipeline description. */
11005 static int
11006 sh_issue_rate (void)
11008 if (TARGET_SUPERSCALAR)
11009 return 2;
11010 else
11011 return 1;
11014 /* Functions for ready queue reordering for sched1. */
11016 /* Get weight for mode for a set x. */
11017 static short
11018 find_set_regmode_weight (rtx x, enum machine_mode mode)
11020 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
11021 return 1;
11022 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
11024 if (REG_P (SET_DEST (x)))
11026 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
11027 return 1;
11028 else
11029 return 0;
11031 return 1;
11033 return 0;
11036 /* Get regmode weight for insn. */
11037 static short
11038 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
11040 short reg_weight = 0;
11041 rtx x;
11043 /* Increment weight for each register born here. */
11044 x = PATTERN (insn);
11045 reg_weight += find_set_regmode_weight (x, mode);
11046 if (GET_CODE (x) == PARALLEL)
11048 int j;
11049 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11051 x = XVECEXP (PATTERN (insn), 0, j);
11052 reg_weight += find_set_regmode_weight (x, mode);
11055 /* Decrement weight for each register that dies here. */
11056 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11058 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11060 rtx note = XEXP (x, 0);
11061 if (REG_P (note) && GET_MODE (note) == mode)
11062 reg_weight--;
11065 return reg_weight;
11068 /* Calculate regmode weights for all insns of a basic block. */
11069 static void
11070 find_regmode_weight (basic_block b, enum machine_mode mode)
11072 rtx insn, next_tail, head, tail;
11074 get_ebb_head_tail (b, b, &head, &tail);
11075 next_tail = NEXT_INSN (tail);
11077 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11079 /* Handle register life information. */
11080 if (!INSN_P (insn))
11081 continue;
11083 if (mode == SFmode)
11084 INSN_REGMODE_WEIGHT (insn, mode) =
11085 find_insn_regmode_weight (insn, mode)
11086 + 2 * find_insn_regmode_weight (insn, DFmode);
11087 else if (mode == SImode)
11088 INSN_REGMODE_WEIGHT (insn, mode) =
11089 find_insn_regmode_weight (insn, mode)
11090 + 2 * find_insn_regmode_weight (insn, DImode);
11094 /* Comparison function for ready queue sorting. */
11095 static int
11096 rank_for_reorder (const void *x, const void *y)
11098 rtx tmp = *(const rtx *) y;
11099 rtx tmp2 = *(const rtx *) x;
11101 /* The insn in a schedule group should be issued the first. */
11102 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11103 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11105 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11106 minimizes instruction movement, thus minimizing sched's effect on
11107 register pressure. */
11108 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11111 /* Resort the array A in which only element at index N may be out of order. */
11112 static void
11113 swap_reorder (rtx *a, int n)
11115 rtx insn = a[n - 1];
11116 int i = n - 2;
11118 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11120 a[i + 1] = a[i];
11121 i -= 1;
11123 a[i + 1] = insn;
11126 /* Sort the ready list by ascending priority. */
11127 static void
11128 ready_reorder (rtx *ready, int nready)
11130 if (nready == 2)
11131 swap_reorder (ready, nready);
11132 else if (nready > 2)
11133 qsort (ready, nready, sizeof (rtx), rank_for_reorder);
11136 /* Count life regions of r0 for a block. */
11137 static int
11138 find_r0_life_regions (basic_block b)
11140 rtx end, insn;
11141 rtx pset;
11142 rtx r0_reg;
11143 int live;
11144 int set;
11145 int death = 0;
11147 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11149 set = 1;
11150 live = 1;
11152 else
11154 set = 0;
11155 live = 0;
11158 insn = BB_HEAD (b);
11159 end = BB_END (b);
11160 r0_reg = gen_rtx_REG (SImode, R0_REG);
11161 while (1)
11163 if (INSN_P (insn))
11165 if (find_regno_note (insn, REG_DEAD, R0_REG))
11167 death++;
11168 live = 0;
11170 if (!live
11171 && (pset = single_set (insn))
11172 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11173 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11175 set++;
11176 live = 1;
11179 if (insn == end)
11180 break;
11181 insn = NEXT_INSN (insn);
11183 return set - death;
11186 /* Calculate regmode weights for all insns of all basic block. */
11187 static void
11188 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11189 int verbose ATTRIBUTE_UNUSED,
11190 int old_max_uid)
11192 basic_block b;
11194 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11195 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11196 r0_life_regions = 0;
11198 FOR_EACH_BB_REVERSE_FN (b, cfun)
11200 find_regmode_weight (b, SImode);
11201 find_regmode_weight (b, SFmode);
11202 if (!reload_completed)
11203 r0_life_regions += find_r0_life_regions (b);
11206 CURR_REGMODE_PRESSURE (SImode) = 0;
11207 CURR_REGMODE_PRESSURE (SFmode) = 0;
11210 /* Cleanup. */
11211 static void
11212 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11213 int verbose ATTRIBUTE_UNUSED)
11215 if (regmode_weight[0])
11217 free (regmode_weight[0]);
11218 regmode_weight[0] = NULL;
11220 if (regmode_weight[1])
11222 free (regmode_weight[1]);
11223 regmode_weight[1] = NULL;
11227 /* The scalar modes supported differs from the default version in TImode
11228 for 32-bit SHMEDIA. */
11229 static bool
11230 sh_scalar_mode_supported_p (enum machine_mode mode)
11232 if (TARGET_SHMEDIA32 && mode == TImode)
11233 return false;
11235 return default_scalar_mode_supported_p (mode);
11238 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11239 keep count of register pressures on SImode and SFmode. */
11240 static int
11241 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11242 int sched_verbose ATTRIBUTE_UNUSED,
11243 rtx insn,
11244 int can_issue_more)
11246 if (GET_CODE (PATTERN (insn)) != USE
11247 && GET_CODE (PATTERN (insn)) != CLOBBER)
11248 cached_can_issue_more = can_issue_more - 1;
11249 else
11250 cached_can_issue_more = can_issue_more;
11252 if (reload_completed)
11253 return cached_can_issue_more;
11255 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11256 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11258 return cached_can_issue_more;
11261 static void
11262 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11263 int verbose ATTRIBUTE_UNUSED,
11264 int veclen ATTRIBUTE_UNUSED)
11266 CURR_REGMODE_PRESSURE (SImode) = 0;
11267 CURR_REGMODE_PRESSURE (SFmode) = 0;
11270 /* Some magic numbers. */
11271 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11272 functions that already have high pressure on r0. */
11273 #define R0_MAX_LIFE_REGIONS 2
11274 /* Register Pressure thresholds for SImode and SFmode registers. */
11275 #define SIMODE_MAX_WEIGHT 5
11276 #define SFMODE_MAX_WEIGHT 10
11278 /* Return true if the pressure is high for MODE. */
11279 static bool
11280 high_pressure (enum machine_mode mode)
11282 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11283 functions that already have high pressure on r0. */
11284 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11285 return true;
11287 if (mode == SFmode)
11288 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11289 else
11290 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11293 /* Reorder ready queue if register pressure is high. */
11294 static int
11295 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11296 int sched_verbose ATTRIBUTE_UNUSED,
11297 rtx *ready,
11298 int *n_readyp,
11299 int clock_var ATTRIBUTE_UNUSED)
11301 if (reload_completed)
11302 return sh_issue_rate ();
11304 if (high_pressure (SFmode) || high_pressure (SImode))
11306 ready_reorder (ready, *n_readyp);
11309 return sh_issue_rate ();
11312 /* Skip cycles if the current register pressure is high. */
11313 static int
11314 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11315 int sched_verbose ATTRIBUTE_UNUSED,
11316 rtx *ready ATTRIBUTE_UNUSED,
11317 int *n_readyp ATTRIBUTE_UNUSED,
11318 int clock_var ATTRIBUTE_UNUSED)
11320 if (reload_completed)
11321 return cached_can_issue_more;
11323 if (high_pressure(SFmode) || high_pressure (SImode))
11324 skip_cycles = 1;
11326 return cached_can_issue_more;
11329 /* Skip cycles without sorting the ready queue. This will move insn from
11330 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11331 queue by sh_reorder. */
11333 /* Generally, skipping these many cycles are sufficient for all insns to move
11334 from Q -> R. */
11335 #define MAX_SKIPS 8
11337 static int
11338 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11339 int sched_verbose ATTRIBUTE_UNUSED,
11340 rtx insn ATTRIBUTE_UNUSED,
11341 int last_clock_var,
11342 int clock_var,
11343 int *sort_p)
11345 if (reload_completed)
11346 return 0;
11348 if (skip_cycles)
11350 if ((clock_var - last_clock_var) < MAX_SKIPS)
11352 *sort_p = 0;
11353 return 1;
11355 /* If this is the last cycle we are skipping, allow reordering of R. */
11356 if ((clock_var - last_clock_var) == MAX_SKIPS)
11358 *sort_p = 1;
11359 return 1;
11363 skip_cycles = 0;
11365 return 0;
11368 /* SHmedia requires registers for branches, so we can't generate new
11369 branches past reload. */
11370 static bool
11371 sh_cannot_modify_jumps_p (void)
11373 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11376 static reg_class_t
11377 sh_target_reg_class (void)
11379 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11382 static bool
11383 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11385 if (! shmedia_space_reserved_for_target_registers)
11386 return 0;
11387 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11388 return 0;
11390 HARD_REG_SET dummy;
11391 if (calc_live_regs (&dummy) >= 6 * 8)
11392 return 1;
11393 return 0;
11396 static bool
11397 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11399 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11403 On the SH1..SH4, the trampoline looks like
11404 2 0002 D202 mov.l l2,r2
11405 1 0000 D301 mov.l l1,r3
11406 3 0004 422B jmp @r2
11407 4 0006 0009 nop
11408 5 0008 00000000 l1: .long area
11409 6 000c 00000000 l2: .long function
11411 SH5 (compact) uses r1 instead of r3 for the static chain. */
11414 /* Emit RTL insns to initialize the variable parts of a trampoline.
11415 FNADDR is an RTX for the address of the function's pure code.
11416 CXT is an RTX for the static chain value for the function. */
11417 static void
11418 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11420 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11421 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11423 if (TARGET_SHMEDIA64)
11425 rtx tramp_templ;
11426 int fixed_len;
11428 rtx movi1 = GEN_INT (0xcc000010);
11429 rtx shori1 = GEN_INT (0xc8000010);
11430 rtx src, dst;
11432 /* The following trampoline works within a +- 128 KB range for cxt:
11433 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11434 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11435 gettr tr1,r1; blink tr0,r63 */
11436 /* Address rounding makes it hard to compute the exact bounds of the
11437 offset for this trampoline, but we have a rather generous offset
11438 range, so frame_offset should do fine as an upper bound. */
11439 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11441 /* ??? could optimize this trampoline initialization
11442 by writing DImode words with two insns each. */
11443 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11444 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11445 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11446 insn = gen_rtx_AND (DImode, insn, mask);
11447 /* Or in ptb/u .,tr1 pattern */
11448 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11449 insn = force_operand (insn, NULL_RTX);
11450 insn = gen_lowpart (SImode, insn);
11451 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11452 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11453 insn = gen_rtx_AND (DImode, insn, mask);
11454 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11455 insn = gen_lowpart (SImode, insn);
11456 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11457 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11458 insn = gen_rtx_AND (DImode, insn, mask);
11459 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11460 insn = gen_lowpart (SImode, insn);
11461 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11462 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11463 insn = gen_rtx_AND (DImode, insn, mask);
11464 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11465 insn = gen_lowpart (SImode, insn);
11466 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11467 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11468 insn = gen_rtx_AND (DImode, insn, mask);
11469 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11470 insn = gen_lowpart (SImode, insn);
11471 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11472 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11473 GEN_INT (0x6bf10600));
11474 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11475 GEN_INT (0x4415fc10));
11476 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11477 GEN_INT (0x4401fff0));
11478 emit_insn (gen_ic_invalidate_line (tramp));
11479 return;
11481 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11482 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11484 tramp_templ = gen_datalabel_ref (tramp_templ);
11485 dst = tramp_mem;
11486 src = gen_const_mem (BLKmode, tramp_templ);
11487 set_mem_align (dst, 256);
11488 set_mem_align (src, 64);
11489 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11491 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11492 emit_move_insn (adjust_address (tramp_mem, Pmode,
11493 fixed_len + GET_MODE_SIZE (Pmode)),
11494 cxt);
11495 emit_insn (gen_ic_invalidate_line (tramp));
11496 return;
11498 else if (TARGET_SHMEDIA)
11500 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11501 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11502 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11503 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11504 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11505 rotated 10 right, and higher 16 bit of every 32 selected. */
11506 rtx movishori
11507 = force_reg (V2HImode, (simplify_gen_subreg
11508 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11509 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11510 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11512 fnaddr = force_reg (SImode, fnaddr);
11513 cxt = force_reg (SImode, cxt);
11514 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11515 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11516 movishori));
11517 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11518 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11519 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11520 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11521 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11522 gen_rtx_SUBREG (V2HImode, cxt, 0),
11523 movishori));
11524 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11525 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11526 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11527 if (TARGET_LITTLE_ENDIAN)
11529 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11530 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11532 else
11534 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11535 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11537 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11538 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11539 emit_insn (gen_ic_invalidate_line (tramp));
11540 return;
11542 else if (TARGET_SHCOMPACT)
11544 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11545 return;
11547 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11548 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11549 SImode));
11550 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11551 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11552 SImode));
11553 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11554 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11555 if (TARGET_HARD_SH4 || TARGET_SH5)
11557 if (!TARGET_INLINE_IC_INVALIDATE
11558 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
11559 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11560 FUNCTION_ORDINARY),
11561 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11562 else
11563 emit_insn (gen_ic_invalidate_line (tramp));
11567 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11568 static rtx
11569 sh_trampoline_adjust_address (rtx tramp)
11571 if (TARGET_SHMEDIA)
11572 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11573 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11574 return tramp;
11577 /* FIXME: This is overly conservative. A SHcompact function that
11578 receives arguments ``by reference'' will have them stored in its
11579 own stack frame, so it must not pass pointers or references to
11580 these arguments to other functions by means of sibling calls. */
11581 /* If PIC, we cannot make sibling calls to global functions
11582 because the PLT requires r12 to be live. */
11583 static bool
11584 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11586 return (1
11587 && (! TARGET_SHCOMPACT
11588 || crtl->args.info.stack_regs == 0)
11589 && ! sh_cfun_interrupt_handler_p ()
11590 && (! flag_pic
11591 || (decl && ! TREE_PUBLIC (decl))
11592 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11595 /* Machine specific built-in functions. */
11597 struct builtin_description
11599 bool (* const is_enabled) (void);
11600 const enum insn_code icode;
11601 const char *const name;
11602 int signature;
11603 tree fndecl;
11606 static bool
11607 shmedia_builtin_p (void)
11609 return TARGET_SHMEDIA;
11612 /* This function can be used if there are any built-ins that are not for
11613 SHmedia. It's commented out to avoid the defined-but-unused warning.
11614 static bool
11615 sh1_builtin_p (void)
11617 return TARGET_SH1;
11621 /* describe number and signedness of arguments; arg[0] == result
11622 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11623 /* 9: 64-bit pointer, 10: 32-bit pointer */
11624 static const char signature_args[][4] =
11626 #define SH_BLTIN_V2SI2 0
11627 { 4, 4 },
11628 #define SH_BLTIN_V4HI2 1
11629 { 4, 4 },
11630 #define SH_BLTIN_V2SI3 2
11631 { 4, 4, 4 },
11632 #define SH_BLTIN_V4HI3 3
11633 { 4, 4, 4 },
11634 #define SH_BLTIN_V8QI3 4
11635 { 4, 4, 4 },
11636 #define SH_BLTIN_MAC_HISI 5
11637 { 1, 4, 4, 1 },
11638 #define SH_BLTIN_SH_HI 6
11639 { 4, 4, 1 },
11640 #define SH_BLTIN_SH_SI 7
11641 { 4, 4, 1 },
11642 #define SH_BLTIN_V4HI2V2SI 8
11643 { 4, 4, 4 },
11644 #define SH_BLTIN_V4HI2V8QI 9
11645 { 4, 4, 4 },
11646 #define SH_BLTIN_SISF 10
11647 { 4, 2 },
11648 #define SH_BLTIN_LDUA_L 11
11649 { 2, 10 },
11650 #define SH_BLTIN_LDUA_Q 12
11651 { 1, 10 },
11652 #define SH_BLTIN_STUA_L 13
11653 { 0, 10, 2 },
11654 #define SH_BLTIN_STUA_Q 14
11655 { 0, 10, 1 },
11656 #define SH_BLTIN_LDUA_L64 15
11657 { 2, 9 },
11658 #define SH_BLTIN_LDUA_Q64 16
11659 { 1, 9 },
11660 #define SH_BLTIN_STUA_L64 17
11661 { 0, 9, 2 },
11662 #define SH_BLTIN_STUA_Q64 18
11663 { 0, 9, 1 },
11664 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11665 #define SH_BLTIN_2 19
11666 #define SH_BLTIN_SU 19
11667 { 1, 2 },
11668 #define SH_BLTIN_3 20
11669 #define SH_BLTIN_SUS 20
11670 { 2, 2, 1 },
11671 #define SH_BLTIN_PSSV 21
11672 { 0, 8, 2, 2 },
11673 #define SH_BLTIN_XXUU 22
11674 #define SH_BLTIN_UUUU 22
11675 { 1, 1, 1, 1 },
11676 #define SH_BLTIN_PV 23
11677 { 0, 8 },
11678 #define SH_BLTIN_VP 24
11679 { 8, 0 },
11681 /* mcmv: operands considered unsigned. */
11682 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11683 /* mperm: control value considered unsigned int. */
11684 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11685 /* mshards_q: returns signed short. */
11686 /* nsb: takes long long arg, returns unsigned char. */
11687 static struct builtin_description bdesc[] =
11689 { shmedia_builtin_p,
11690 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11691 { shmedia_builtin_p,
11692 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11693 { shmedia_builtin_p,
11694 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11695 { shmedia_builtin_p,
11696 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11697 { shmedia_builtin_p,
11698 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11699 { shmedia_builtin_p,
11700 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11701 { shmedia_builtin_p,
11702 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11703 { shmedia_builtin_p,
11704 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11705 { shmedia_builtin_p,
11706 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11707 { shmedia_builtin_p,
11708 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11709 { shmedia_builtin_p,
11710 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11711 { shmedia_builtin_p,
11712 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11713 { shmedia_builtin_p,
11714 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11715 { shmedia_builtin_p,
11716 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11717 { shmedia_builtin_p,
11718 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11719 { shmedia_builtin_p,
11720 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11721 { shmedia_builtin_p,
11722 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11723 { shmedia_builtin_p,
11724 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11725 { shmedia_builtin_p,
11726 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11727 { shmedia_builtin_p,
11728 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11729 { shmedia_builtin_p,
11730 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11731 { shmedia_builtin_p,
11732 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11733 { shmedia_builtin_p,
11734 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11735 { shmedia_builtin_p,
11736 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11737 { shmedia_builtin_p,
11738 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11739 { shmedia_builtin_p,
11740 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11741 { shmedia_builtin_p,
11742 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11743 { shmedia_builtin_p,
11744 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11745 { shmedia_builtin_p,
11746 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11747 { shmedia_builtin_p,
11748 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11749 { shmedia_builtin_p,
11750 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11751 { shmedia_builtin_p,
11752 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11753 { shmedia_builtin_p,
11754 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11755 { shmedia_builtin_p,
11756 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11757 { shmedia_builtin_p,
11758 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11759 { shmedia_builtin_p,
11760 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11761 { shmedia_builtin_p,
11762 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11763 { shmedia_builtin_p,
11764 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11765 { shmedia_builtin_p,
11766 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11767 { shmedia_builtin_p,
11768 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11769 { shmedia_builtin_p,
11770 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11771 { shmedia_builtin_p,
11772 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11773 { shmedia_builtin_p,
11774 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11775 { shmedia_builtin_p,
11776 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11777 { shmedia_builtin_p,
11778 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11779 { shmedia_builtin_p,
11780 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11781 { shmedia_builtin_p,
11782 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11783 { shmedia_builtin_p,
11784 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11785 { shmedia_builtin_p,
11786 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11787 { shmedia_builtin_p,
11788 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11789 { shmedia_builtin_p,
11790 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11791 { shmedia_builtin_p,
11792 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11793 { shmedia_builtin_p,
11794 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11795 { shmedia_builtin_p,
11796 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11797 { shmedia_builtin_p,
11798 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11799 { shmedia_builtin_p,
11800 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11801 { shmedia_builtin_p,
11802 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11803 { shmedia_builtin_p,
11804 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11805 { shmedia_builtin_p,
11806 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11807 { shmedia_builtin_p,
11808 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11809 { shmedia_builtin_p,
11810 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11811 { shmedia_builtin_p,
11812 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11813 { shmedia_builtin_p,
11814 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11815 { shmedia_builtin_p,
11816 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11817 { shmedia_builtin_p,
11818 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11819 { shmedia_builtin_p,
11820 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11821 { shmedia_builtin_p,
11822 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11823 { shmedia_builtin_p,
11824 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11825 { shmedia_builtin_p,
11826 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11827 { shmedia_builtin_p,
11828 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11829 { shmedia_builtin_p,
11830 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11831 { shmedia_builtin_p,
11832 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11833 { shmedia_builtin_p,
11834 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11835 { shmedia_builtin_p,
11836 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11837 { shmedia_builtin_p,
11838 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11839 { shmedia_builtin_p,
11840 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11841 { shmedia_builtin_p,
11842 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11843 { shmedia_builtin_p,
11844 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11845 { shmedia_builtin_p,
11846 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11847 { shmedia_builtin_p,
11848 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11849 { shmedia_builtin_p,
11850 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11851 { shmedia_builtin_p,
11852 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11853 { shmedia_builtin_p,
11854 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11857 static void
11858 sh_init_builtins (void)
11860 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11861 memset (shared, 0, sizeof shared);
11863 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
11865 builtin_description* d = &bdesc[di];
11867 if (!d->is_enabled ())
11868 continue;
11870 tree type, arg_type = NULL_TREE;
11871 int signature = d->signature;
11873 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11874 type = shared[signature];
11875 else
11877 int has_result = signature_args[signature][0] != 0;
11878 tree args[3];
11880 if ((signature_args[signature][1] & 8)
11881 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11882 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11883 continue;
11884 if (! TARGET_FPU_ANY
11885 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11886 continue;
11887 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
11888 args[i] = NULL_TREE;
11889 for (int i = 3; ; i--)
11891 int arg = signature_args[signature][i];
11892 int opno = i - 1 + has_result;
11894 if (arg & 8)
11895 arg_type = ptr_type_node;
11896 else if (arg)
11897 arg_type = (*lang_hooks.types.type_for_mode)
11898 (insn_data[d->icode].operand[opno].mode, (arg & 1));
11899 else if (i)
11900 continue;
11901 else
11902 arg_type = void_type_node;
11903 if (i == 0)
11904 break;
11905 args[i-1] = arg_type;
11907 type = build_function_type_list (arg_type, args[0], args[1],
11908 args[2], NULL_TREE);
11909 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11910 shared[signature] = type;
11912 d->fndecl =
11913 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11914 NULL, NULL_TREE);
11918 /* Implements target hook vector_mode_supported_p. */
11919 bool
11920 sh_vector_mode_supported_p (enum machine_mode mode)
11922 if (TARGET_FPU_ANY
11923 && ((mode == V2SFmode)
11924 || (mode == V4SFmode)
11925 || (mode == V16SFmode)))
11926 return true;
11928 else if (TARGET_SHMEDIA
11929 && ((mode == V8QImode)
11930 || (mode == V2HImode)
11931 || (mode == V4HImode)
11932 || (mode == V2SImode)))
11933 return true;
11935 return false;
11938 bool
11939 sh_frame_pointer_required (void)
11941 /* If needed override this in other tm.h files to cope with various OS
11942 lossage requiring a frame pointer. */
11943 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11944 return true;
11946 if (crtl->profile)
11947 return true;
11949 return false;
11952 /* Implements target hook dwarf_calling_convention. Return an enum
11953 of dwarf_calling_convention. */
11955 sh_dwarf_calling_convention (const_tree func)
11957 if (sh_attr_renesas_p (func))
11958 return DW_CC_GNU_renesas_sh;
11960 return DW_CC_normal;
11963 /* Returns the sh builtin decl for CODE. */
11964 static tree
11965 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11967 if (code >= ARRAY_SIZE (bdesc))
11968 return error_mark_node;
11970 if (!bdesc[code].is_enabled ())
11971 return error_mark_node;
11973 return bdesc[code].fndecl;
11976 /* Expand an expression EXP that calls a built-in function,
11977 with result going to TARGET if that's convenient
11978 (and in mode MODE if that's convenient).
11979 SUBTARGET may be used as the target for computing one of EXP's operands.
11980 IGNORE is nonzero if the value is to be ignored. */
11981 static rtx
11982 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11983 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11985 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11986 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11987 const struct builtin_description *d = &bdesc[fcode];
11988 enum insn_code icode = d->icode;
11989 int signature = d->signature;
11990 int nop = 0;
11991 rtx op[4];
11993 if (signature_args[signature][0])
11995 if (ignore)
11996 return NULL_RTX;
11998 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11999 if (! target || GET_MODE (target) != tmode
12000 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12001 target = gen_reg_rtx (tmode);
12002 op[nop++] = target;
12004 else
12005 target = NULL_RTX;
12007 for (int i = 1; i <= 3; i++, nop++)
12009 tree arg;
12010 enum machine_mode opmode, argmode;
12011 tree optype;
12013 if (! signature_args[signature][i])
12014 break;
12015 arg = CALL_EXPR_ARG (exp, i - 1);
12016 if (arg == error_mark_node)
12017 return const0_rtx;
12018 if (signature_args[signature][i] & 8)
12020 opmode = ptr_mode;
12021 optype = ptr_type_node;
12023 else
12025 opmode = insn_data[icode].operand[nop].mode;
12026 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12028 argmode = TYPE_MODE (TREE_TYPE (arg));
12029 if (argmode != opmode)
12030 arg = build1 (NOP_EXPR, optype, arg);
12031 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12032 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12033 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12036 rtx pat = NULL_RTX;
12038 switch (nop)
12040 case 1:
12041 pat = (*insn_data[d->icode].genfun) (op[0]);
12042 break;
12043 case 2:
12044 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12045 break;
12046 case 3:
12047 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12048 break;
12049 case 4:
12050 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12051 break;
12052 default:
12053 gcc_unreachable ();
12055 if (! pat)
12056 return NULL_RTX;
12057 emit_insn (pat);
12058 return target;
12061 void
12062 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12064 rtx sel0 = const0_rtx;
12065 rtx sel1 = const1_rtx;
12066 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12067 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12069 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12070 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12073 void
12074 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12076 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12078 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12079 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12082 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12083 We can allow any mode in any general register. The special registers
12084 only allow SImode. Don't allow any mode in the PR.
12086 We cannot hold DCmode values in the XD registers because alter_reg
12087 handles subregs of them incorrectly. We could work around this by
12088 spacing the XD registers like the DR registers, but this would require
12089 additional memory in every compilation to hold larger register vectors.
12090 We could hold SFmode / SCmode values in XD registers, but that
12091 would require a tertiary reload when reloading from / to memory,
12092 and a secondary reload to reload from / to general regs; that
12093 seems to be a losing proposition.
12095 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12096 it won't be ferried through GP registers first. */
12097 bool
12098 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
12100 if (SPECIAL_REGISTER_P (regno))
12101 return mode == SImode;
12103 if (regno == FPUL_REG)
12104 return (mode == SImode || mode == SFmode);
12106 if (FP_REGISTER_P (regno) && mode == SFmode)
12107 return true;
12109 if (mode == V2SFmode)
12111 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12112 || GENERAL_REGISTER_P (regno)))
12113 return true;
12114 else
12115 return false;
12118 if (mode == V4SFmode)
12120 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12121 || GENERAL_REGISTER_P (regno))
12122 return true;
12123 else
12124 return false;
12127 if (mode == V16SFmode)
12129 if (TARGET_SHMEDIA)
12131 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12132 return true;
12133 else
12134 return false;
12136 else
12137 return regno == FIRST_XD_REG;
12140 if (FP_REGISTER_P (regno))
12142 if (mode == SFmode
12143 || mode == SImode
12144 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12145 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12146 || mode == DCmode
12147 || (TARGET_SHMEDIA
12148 && (mode == DFmode || mode == DImode
12149 || mode == V2SFmode || mode == TImode)))
12150 && ((regno - FIRST_FP_REG) & 1) == 0)
12151 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12152 && ((regno - FIRST_FP_REG) & 3) == 0))
12153 return true;
12154 else
12155 return false;
12158 if (XD_REGISTER_P (regno))
12159 return mode == DFmode;
12161 if (TARGET_REGISTER_P (regno))
12162 return (mode == DImode || mode == SImode || mode == PDImode);
12164 if (regno == PR_REG)
12165 return mode == SImode;
12167 if (regno == FPSCR_REG)
12168 return mode == PSImode;
12170 /* FIXME. This works around PR target/37633 for -O0. */
12171 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12173 unsigned int n = GET_MODE_SIZE (mode) / 8;
12175 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12176 && regno <= FIRST_GENERAL_REG + 14)
12177 return false;
12180 return true;
12183 /* Return the class of registers for which a mode change from FROM to TO
12184 is invalid. */
12185 bool
12186 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
12187 enum reg_class rclass)
12189 /* We want to enable the use of SUBREGs as a means to
12190 VEC_SELECT a single element of a vector. */
12192 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12193 This can be problematic when SFmode vector subregs need to be accessed
12194 on the stack with displacement addressing, as it happens with -O0.
12195 Thus we disallow the mode change for -O0. */
12196 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12197 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12199 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12201 if (TARGET_LITTLE_ENDIAN)
12203 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12204 return reg_classes_intersect_p (DF_REGS, rclass);
12206 else
12208 if (GET_MODE_SIZE (from) < 8)
12209 return reg_classes_intersect_p (DF_REGS, rclass);
12212 return false;
12215 /* Return true if registers in machine mode MODE will likely be
12216 allocated to registers in small register classes. */
12217 bool
12218 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
12220 return (! TARGET_SHMEDIA);
12223 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12224 that label is used. */
12225 void
12226 sh_mark_label (rtx address, int nuses)
12228 if (GOTOFF_P (address))
12230 /* Extract the label or symbol. */
12231 address = XEXP (address, 0);
12232 if (GET_CODE (address) == PLUS)
12233 address = XEXP (address, 0);
12234 address = XVECEXP (address, 0, 0);
12236 if (GET_CODE (address) == LABEL_REF
12237 && LABEL_P (XEXP (address, 0)))
12238 LABEL_NUSES (XEXP (address, 0)) += nuses;
12241 /* Compute extra cost of moving data between one register class
12242 and another.
12244 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12245 uses this information. Hence, the general register <-> floating point
12246 register information here is not used for SFmode. */
12247 static int
12248 sh_register_move_cost (enum machine_mode mode,
12249 reg_class_t srcclass, reg_class_t dstclass)
12251 if (dstclass == T_REGS || dstclass == PR_REGS)
12252 return 10;
12254 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12255 return 4;
12257 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12258 && REGCLASS_HAS_FP_REG (srcclass)
12259 && REGCLASS_HAS_FP_REG (dstclass))
12260 return 4;
12262 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12263 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12265 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12266 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12267 return 9;
12269 if ((REGCLASS_HAS_FP_REG (dstclass)
12270 && REGCLASS_HAS_GENERAL_REG (srcclass))
12271 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12272 && REGCLASS_HAS_FP_REG (srcclass)))
12274 /* Discourage trying to use fp regs for a pointer. This also
12275 discourages fp regs with SImode because Pmode is an alias
12276 of SImode on this target. See PR target/48596. */
12277 int addend = (mode == Pmode) ? 40 : 0;
12279 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12280 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12283 if ((dstclass == FPUL_REGS
12284 && REGCLASS_HAS_GENERAL_REG (srcclass))
12285 || (srcclass == FPUL_REGS
12286 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12287 return 5;
12289 if ((dstclass == FPUL_REGS
12290 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12291 || (srcclass == FPUL_REGS
12292 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12293 return 7;
12295 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12296 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12297 return 20;
12299 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12300 if (TARGET_SHMEDIA
12301 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12303 if (sh_gettrcost >= 0)
12304 return sh_gettrcost;
12305 else if (!TARGET_PT_FIXED)
12306 return 100;
12309 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12310 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12311 return 4;
12313 if (TARGET_SHMEDIA
12314 || (TARGET_FMOVD
12315 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12316 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12317 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12319 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12322 static rtx
12323 emit_load_ptr (rtx reg, rtx addr)
12325 rtx mem = gen_const_mem (ptr_mode, addr);
12327 if (Pmode != ptr_mode)
12328 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12329 return emit_move_insn (reg, mem);
12332 static void
12333 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12334 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12335 tree function)
12337 CUMULATIVE_ARGS cum;
12338 int structure_value_byref = 0;
12339 rtx this_rtx, this_value, sibcall, insns, funexp;
12340 tree funtype = TREE_TYPE (function);
12341 int simple_add = CONST_OK_FOR_ADD (delta);
12342 int did_load = 0;
12343 rtx scratch0, scratch1, scratch2;
12344 unsigned i;
12346 reload_completed = 1;
12347 epilogue_completed = 1;
12348 crtl->uses_only_leaf_regs = 1;
12350 emit_note (NOTE_INSN_PROLOGUE_END);
12352 /* Find the "this" pointer. We have such a wide range of ABIs for the
12353 SH that it's best to do this completely machine independently.
12354 "this" is passed as first argument, unless a structure return pointer
12355 comes first, in which case "this" comes second. */
12356 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12357 #ifndef PCC_STATIC_STRUCT_RETURN
12358 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12359 structure_value_byref = 1;
12360 #endif /* not PCC_STATIC_STRUCT_RETURN */
12361 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12363 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12365 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12367 this_rtx
12368 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12370 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12371 static chain pointer (even if you can't have nested virtual functions
12372 right now, someone might implement them sometime), and the rest of the
12373 registers are used for argument passing, are callee-saved, or reserved. */
12374 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12375 -ffixed-reg has been used. */
12376 if (! call_used_regs[0] || fixed_regs[0])
12377 error ("r0 needs to be available as a call-clobbered register");
12378 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12379 if (! TARGET_SH5)
12381 if (call_used_regs[1] && ! fixed_regs[1])
12382 scratch1 = gen_rtx_REG (ptr_mode, 1);
12383 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12384 pointing where to return struct values. */
12385 if (call_used_regs[3] && ! fixed_regs[3])
12386 scratch2 = gen_rtx_REG (Pmode, 3);
12388 else if (TARGET_SHMEDIA)
12390 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12391 if (i != REGNO (scratch0) &&
12392 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12394 scratch1 = gen_rtx_REG (ptr_mode, i);
12395 break;
12397 if (scratch1 == scratch0)
12398 error ("need a second call-clobbered general purpose register");
12399 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12400 if (call_used_regs[i] && ! fixed_regs[i])
12402 scratch2 = gen_rtx_REG (Pmode, i);
12403 break;
12405 if (scratch2 == scratch0)
12406 error ("need a call-clobbered target register");
12409 this_value = plus_constant (Pmode, this_rtx, delta);
12410 if (vcall_offset
12411 && (simple_add || scratch0 != scratch1)
12412 && strict_memory_address_p (ptr_mode, this_value))
12414 emit_load_ptr (scratch0, this_value);
12415 did_load = 1;
12418 if (!delta)
12419 ; /* Do nothing. */
12420 else if (simple_add)
12421 emit_move_insn (this_rtx, this_value);
12422 else
12424 emit_move_insn (scratch1, GEN_INT (delta));
12425 emit_insn (gen_add2_insn (this_rtx, scratch1));
12428 if (vcall_offset)
12430 rtx offset_addr;
12432 if (!did_load)
12433 emit_load_ptr (scratch0, this_rtx);
12435 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12436 if (strict_memory_address_p (ptr_mode, offset_addr))
12437 ; /* Do nothing. */
12438 else if (! TARGET_SH5 && scratch0 != scratch1)
12440 /* scratch0 != scratch1, and we have indexed loads. Get better
12441 schedule by loading the offset into r1 and using an indexed
12442 load - then the load of r1 can issue before the load from
12443 (this_rtx + delta) finishes. */
12444 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12445 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12447 else if (CONST_OK_FOR_ADD (vcall_offset))
12449 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12450 offset_addr = scratch0;
12452 else if (scratch0 != scratch1)
12454 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12455 emit_insn (gen_add2_insn (scratch0, scratch1));
12456 offset_addr = scratch0;
12458 else
12459 gcc_unreachable (); /* FIXME */
12460 emit_load_ptr (scratch0, offset_addr);
12462 if (Pmode != ptr_mode)
12463 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12464 emit_insn (gen_add2_insn (this_rtx, scratch0));
12467 /* Generate a tail call to the target function. */
12468 if (! TREE_USED (function))
12470 assemble_external (function);
12471 TREE_USED (function) = 1;
12473 funexp = XEXP (DECL_RTL (function), 0);
12474 /* If the function is overridden, so is the thunk, hence we don't
12475 need GOT addressing even if this is a public symbol. */
12476 #if 0
12477 if (TARGET_SH1 && ! flag_weak)
12478 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12479 else
12480 #endif
12481 if (TARGET_SH2 && flag_pic)
12483 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12484 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12486 else
12488 if (TARGET_SHMEDIA && flag_pic)
12490 funexp = gen_sym2PIC (funexp);
12491 PUT_MODE (funexp, Pmode);
12493 emit_move_insn (scratch2, funexp);
12494 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12495 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12497 sibcall = emit_call_insn (sibcall);
12498 SIBLING_CALL_P (sibcall) = 1;
12499 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12500 emit_barrier ();
12502 /* Run just enough of rest_of_compilation to do scheduling and get
12503 the insns emitted. Note that use_thunk calls
12504 assemble_start_function and assemble_end_function. */
12506 insns = get_insns ();
12508 if (optimize > 0)
12510 if (! cfun->cfg)
12511 init_flow (cfun);
12512 split_all_insns_noflow ();
12515 sh_reorg ();
12516 shorten_branches (insns);
12517 final_start_function (insns, file, 1);
12518 final (insns, file, 1);
12519 final_end_function ();
12521 reload_completed = 0;
12522 epilogue_completed = 0;
12526 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12528 rtx sym;
12530 /* If this is not an ordinary function, the name usually comes from a
12531 string literal or an sprintf buffer. Make sure we use the same
12532 string consistently, so that cse will be able to unify address loads. */
12533 if (kind != FUNCTION_ORDINARY)
12534 name = IDENTIFIER_POINTER (get_identifier (name));
12535 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12536 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12537 if (flag_pic)
12538 switch (kind)
12540 case FUNCTION_ORDINARY:
12541 break;
12542 case SFUNC_GOT:
12544 rtx reg = target ? target : gen_reg_rtx (Pmode);
12546 emit_insn (gen_symGOT2reg (reg, sym));
12547 sym = reg;
12548 break;
12550 case SFUNC_STATIC:
12552 /* ??? To allow cse to work, we use GOTOFF relocations.
12553 We could add combiner patterns to transform this into
12554 straight pc-relative calls with sym2PIC / bsrf when
12555 label load and function call are still 1:1 and in the
12556 same basic block during combine. */
12557 rtx reg = target ? target : gen_reg_rtx (Pmode);
12559 emit_insn (gen_symGOTOFF2reg (reg, sym));
12560 sym = reg;
12561 break;
12564 if (target && sym != target)
12566 emit_move_insn (target, sym);
12567 return target;
12569 return sym;
12572 /* Find the number of a general purpose register in S. */
12573 static int
12574 scavenge_reg (HARD_REG_SET *s)
12576 int r;
12577 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12578 if (TEST_HARD_REG_BIT (*s, r))
12579 return r;
12580 return -1;
12584 sh_get_pr_initial_val (void)
12586 rtx val;
12588 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12589 PR register on SHcompact, because it might be clobbered by the prologue.
12590 We check first if that is known to be the case. */
12591 if (TARGET_SHCOMPACT
12592 && ((crtl->args.info.call_cookie
12593 & ~ CALL_COOKIE_RET_TRAMP (1))
12594 || crtl->saves_all_registers))
12595 return gen_frame_mem (SImode, return_address_pointer_rtx);
12597 /* If we haven't finished rtl generation, there might be a nonlocal label
12598 that we haven't seen yet.
12599 ??? get_hard_reg_initial_val fails if it is called after register
12600 allocation has started, unless it has been called before for the
12601 same register. And even then, we end in trouble if we didn't use
12602 the register in the same basic block before. So call
12603 get_hard_reg_initial_val now and wrap it in an unspec if we might
12604 need to replace it. */
12605 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12606 combine can put the pseudo returned by get_hard_reg_initial_val into
12607 instructions that need a general purpose registers, which will fail to
12608 be recognized when the pseudo becomes allocated to PR. */
12610 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12611 if (TARGET_SH1)
12612 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12613 return val;
12616 bool
12617 sh_expand_t_scc (rtx operands[])
12619 enum rtx_code code = GET_CODE (operands[1]);
12620 rtx target = operands[0];
12621 rtx op0 = operands[2];
12622 rtx op1 = operands[3];
12623 rtx result = target;
12624 HOST_WIDE_INT val;
12626 if (!REG_P (op0) || REGNO (op0) != T_REG
12627 || !CONST_INT_P (op1))
12628 return false;
12629 if (!REG_P (result))
12630 result = gen_reg_rtx (SImode);
12631 val = INTVAL (op1);
12632 if ((code == EQ && val == 1) || (code == NE && val == 0))
12633 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12634 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12635 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12636 else if (code == EQ || code == NE)
12637 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12638 else
12639 return false;
12640 if (result != target)
12641 emit_move_insn (target, result);
12642 return true;
12645 /* INSN is an sfunc; return the rtx that describes the address used. */
12646 static rtx
12647 extract_sfunc_addr (rtx insn)
12649 rtx pattern, part = NULL_RTX;
12650 int len, i;
12652 pattern = PATTERN (insn);
12653 len = XVECLEN (pattern, 0);
12654 for (i = 0; i < len; i++)
12656 part = XVECEXP (pattern, 0, i);
12657 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12658 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12659 return XEXP (part, 0);
12661 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12662 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12665 /* Verify that the register in use_sfunc_addr still agrees with the address
12666 used in the sfunc. This prevents fill_slots_from_thread from changing
12667 use_sfunc_addr.
12668 INSN is the use_sfunc_addr instruction, and REG is the register it
12669 guards. */
12670 bool
12671 check_use_sfunc_addr (rtx insn, rtx reg)
12673 /* Search for the sfunc. It should really come right after INSN. */
12674 while ((insn = NEXT_INSN (insn)))
12676 if (LABEL_P (insn) || JUMP_P (insn))
12677 break;
12678 if (! INSN_P (insn))
12679 continue;
12681 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
12682 insn = XVECEXP (PATTERN (insn), 0, 0);
12683 if (GET_CODE (PATTERN (insn)) != PARALLEL
12684 || get_attr_type (insn) != TYPE_SFUNC)
12685 continue;
12686 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12688 gcc_unreachable ();
12691 /* This function returns a constant rtx that represents 2**15 / pi in
12692 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12693 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12694 static GTY(()) rtx sh_fsca_sf2int_rtx;
12697 sh_fsca_sf2int (void)
12699 if (! sh_fsca_sf2int_rtx)
12701 REAL_VALUE_TYPE rv;
12703 real_from_string (&rv, "10430.378350470453");
12704 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12707 return sh_fsca_sf2int_rtx;
12710 /* This function returns a constant rtx that represents pi / 2**15 in
12711 SFmode. It's used to scale SFmode angles, in radians, to a
12712 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12713 maps to 0x10000. */
12714 static GTY(()) rtx sh_fsca_int2sf_rtx;
12717 sh_fsca_int2sf (void)
12719 if (! sh_fsca_int2sf_rtx)
12721 REAL_VALUE_TYPE rv;
12723 real_from_string (&rv, "9.587379924285257e-5");
12724 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12727 return sh_fsca_int2sf_rtx;
12730 /* Initialize the CUMULATIVE_ARGS structure. */
12731 void
12732 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12733 tree fntype,
12734 rtx libname ATTRIBUTE_UNUSED,
12735 tree fndecl,
12736 signed int n_named_args,
12737 enum machine_mode mode)
12739 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12740 pcum->free_single_fp_reg = 0;
12741 pcum->stack_regs = 0;
12742 pcum->byref_regs = 0;
12743 pcum->byref = 0;
12744 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12746 /* XXX - Should we check TARGET_HITACHI here ??? */
12747 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12749 if (fntype)
12751 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12752 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12753 pcum->prototype_p = prototype_p (fntype);
12754 pcum->arg_count [(int) SH_ARG_INT]
12755 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12757 pcum->call_cookie
12758 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12759 && pcum->arg_count [(int) SH_ARG_INT] == 0
12760 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12761 ? int_size_in_bytes (TREE_TYPE (fntype))
12762 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12763 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12764 == FIRST_RET_REG));
12766 else
12768 pcum->arg_count [(int) SH_ARG_INT] = 0;
12769 pcum->prototype_p = FALSE;
12770 if (mode != VOIDmode)
12772 pcum->call_cookie =
12773 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12774 && GET_MODE_SIZE (mode) > 4
12775 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12777 /* If the default ABI is the Renesas ABI then all library
12778 calls must assume that the library will be using the
12779 Renesas ABI. So if the function would return its result
12780 in memory then we must force the address of this memory
12781 block onto the stack. Ideally we would like to call
12782 targetm.calls.return_in_memory() here but we do not have
12783 the TYPE or the FNDECL available so we synthesize the
12784 contents of that function as best we can. */
12785 pcum->force_mem =
12786 (TARGET_DEFAULT & MASK_HITACHI)
12787 && (mode == BLKmode
12788 || (GET_MODE_SIZE (mode) > 4
12789 && !(mode == DFmode
12790 && TARGET_FPU_DOUBLE)));
12792 else
12794 pcum->call_cookie = 0;
12795 pcum->force_mem = FALSE;
12800 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12801 not enter into CONST_DOUBLE for the replace.
12803 Note that copying is not done so X must not be shared unless all copies
12804 are to be modified.
12806 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12807 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12808 replacements[n*2+1] - and that we take mode changes into account.
12810 If a replacement is ambiguous, return NULL_RTX.
12812 If MODIFY is zero, don't modify any rtl in place,
12813 just return zero or nonzero for failure / success. */
12815 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12817 int i, j;
12818 const char *fmt;
12820 /* The following prevents loops occurrence when we change MEM in
12821 CONST_DOUBLE onto the same CONST_DOUBLE. */
12822 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
12823 return x;
12825 for (i = n_replacements - 1; i >= 0 ; i--)
12826 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12827 return replacements[i*2+1];
12829 /* Allow this function to make replacements in EXPR_LISTs. */
12830 if (x == NULL_RTX)
12831 return NULL_RTX;
12833 if (GET_CODE (x) == SUBREG)
12835 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12836 n_replacements, modify);
12838 if (CONST_INT_P (new_rtx))
12840 x = simplify_subreg (GET_MODE (x), new_rtx,
12841 GET_MODE (SUBREG_REG (x)),
12842 SUBREG_BYTE (x));
12843 if (! x)
12844 abort ();
12846 else if (modify)
12847 SUBREG_REG (x) = new_rtx;
12849 return x;
12851 else if (REG_P (x))
12853 unsigned regno = REGNO (x);
12854 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12855 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12856 rtx result = NULL_RTX;
12858 for (i = n_replacements - 1; i >= 0; i--)
12860 rtx from = replacements[i*2];
12861 rtx to = replacements[i*2+1];
12862 unsigned from_regno, from_nregs, to_regno, new_regno;
12864 if (!REG_P (from))
12865 continue;
12866 from_regno = REGNO (from);
12867 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12868 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12869 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12871 if (regno < from_regno
12872 || regno + nregs > from_regno + nregs
12873 || !REG_P (to)
12874 || result)
12875 return NULL_RTX;
12876 to_regno = REGNO (to);
12877 if (to_regno < FIRST_PSEUDO_REGISTER)
12879 new_regno = regno + to_regno - from_regno;
12880 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12881 != nregs)
12882 return NULL_RTX;
12883 result = gen_rtx_REG (GET_MODE (x), new_regno);
12885 else if (GET_MODE (x) <= GET_MODE (to))
12886 result = gen_lowpart_common (GET_MODE (x), to);
12887 else
12888 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12891 return result ? result : x;
12893 else if (GET_CODE (x) == ZERO_EXTEND)
12895 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12896 n_replacements, modify);
12898 if (CONST_INT_P (new_rtx))
12900 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12901 new_rtx, GET_MODE (XEXP (x, 0)));
12902 if (! x)
12903 abort ();
12905 else if (modify)
12906 XEXP (x, 0) = new_rtx;
12908 return x;
12911 fmt = GET_RTX_FORMAT (GET_CODE (x));
12912 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12914 rtx new_rtx;
12916 if (fmt[i] == 'e')
12918 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12919 n_replacements, modify);
12920 if (!new_rtx)
12921 return NULL_RTX;
12922 if (modify)
12923 XEXP (x, i) = new_rtx;
12925 else if (fmt[i] == 'E')
12926 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12928 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12929 n_replacements, modify);
12930 if (!new_rtx)
12931 return NULL_RTX;
12932 if (modify)
12933 XVECEXP (x, i, j) = new_rtx;
12937 return x;
12941 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12943 enum rtx_code code = TRUNCATE;
12945 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12947 rtx inner = XEXP (x, 0);
12948 enum machine_mode inner_mode = GET_MODE (inner);
12950 if (inner_mode == mode)
12951 return inner;
12952 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12953 x = inner;
12954 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12955 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12957 code = GET_CODE (x);
12958 x = inner;
12961 return gen_rtx_fmt_e (code, mode, x);
12964 /* Called via for_each_rtx after reload, to clean up truncates of
12965 registers that span multiple actual hard registers. */
12967 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12969 rtx x = *p, reg;
12971 if (GET_CODE (x) != TRUNCATE)
12972 return 0;
12973 reg = XEXP (x, 0);
12974 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12976 enum machine_mode reg_mode = GET_MODE (reg);
12977 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12978 subreg_lowpart_offset (DImode, reg_mode));
12979 *(int*) n_changes += 1;
12980 return -1;
12982 return 0;
12985 /* Load and store depend on the highpart of the address. However,
12986 set_attr_alternative does not give well-defined results before reload,
12987 so we must look at the rtl ourselves to see if any of the feeding
12988 registers is used in a memref.
12990 Called by sh_contains_memref_p via for_each_rtx. */
12991 static int
12992 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12994 return (MEM_P (*loc));
12997 /* Return true iff INSN contains a MEM. */
12998 bool
12999 sh_contains_memref_p (rtx insn)
13001 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
13004 /* Return true iff INSN loads a banked register. */
13005 bool
13006 sh_loads_bankedreg_p (rtx insn)
13008 if (GET_CODE (PATTERN (insn)) == SET)
13010 rtx op = SET_DEST (PATTERN(insn));
13011 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
13012 return true;
13015 return false;
13018 /* FNADDR is the MEM expression from a call expander. Return an address
13019 to use in an SHmedia insn pattern. */
13021 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13023 int is_sym;
13025 fnaddr = XEXP (fnaddr, 0);
13026 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13027 if (flag_pic && is_sym)
13029 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13031 rtx reg = gen_reg_rtx (Pmode);
13033 /* We must not use GOTPLT for sibcalls, because PIC_REG
13034 must be restored before the PLT code gets to run. */
13035 if (is_sibcall)
13036 emit_insn (gen_symGOT2reg (reg, fnaddr));
13037 else
13038 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13039 fnaddr = reg;
13041 else
13043 fnaddr = gen_sym2PIC (fnaddr);
13044 PUT_MODE (fnaddr, Pmode);
13047 /* If ptabs might trap, make this visible to the rest of the compiler.
13048 We generally assume that symbols pertain to valid locations, but
13049 it is possible to generate invalid symbols with asm or linker tricks.
13050 In a list of functions where each returns its successor, an invalid
13051 symbol might denote an empty list. */
13052 if (!TARGET_PT_FIXED
13053 && (!is_sym || TARGET_INVALID_SYMBOLS)
13054 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13056 rtx tr = gen_reg_rtx (PDImode);
13058 emit_insn (gen_ptabs (tr, fnaddr));
13059 fnaddr = tr;
13061 else if (! target_reg_operand (fnaddr, Pmode))
13062 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13063 return fnaddr;
13066 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13067 static reg_class_t
13068 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13070 if (rclass == NO_REGS
13071 && TARGET_SHMEDIA
13072 && (CONST_DOUBLE_P (x)
13073 || GET_CODE (x) == SYMBOL_REF
13074 || PIC_ADDR_P (x)))
13075 return GENERAL_REGS;
13077 return rclass;
13080 /* Implement TARGET_SECONDARY_RELOAD. */
13081 static reg_class_t
13082 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13083 enum machine_mode mode, secondary_reload_info *sri)
13085 enum reg_class rclass = (enum reg_class) rclass_i;
13087 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13088 && REG_P (XEXP (XEXP (x, 0), 0))
13089 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13090 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13092 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13093 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13095 if (REG_P (x) && REGNO (x) == GBR_REG)
13096 return NO_REGS;
13098 if (in_p)
13100 if (REGCLASS_HAS_FP_REG (rclass)
13101 && ! TARGET_SHMEDIA
13102 && immediate_operand ((x), mode)
13103 && ! ((fp_zero_operand (x) || fp_one_operand (x))
13104 && mode == SFmode && fldi_ok ()))
13105 switch (mode)
13107 case SFmode:
13108 sri->icode = CODE_FOR_reload_insf__frn;
13109 return NO_REGS;
13110 case DFmode:
13111 sri->icode = CODE_FOR_reload_indf__frn;
13112 return NO_REGS;
13113 case SImode:
13114 /* ??? If we knew that we are in the appropriate mode -
13115 single precision - we could use a reload pattern directly. */
13116 return FPUL_REGS;
13117 default:
13118 abort ();
13120 if (rclass == FPUL_REGS
13121 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13122 || REGNO (x) == T_REG))
13123 || GET_CODE (x) == PLUS))
13124 return GENERAL_REGS;
13125 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13127 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13128 return GENERAL_REGS;
13129 else if (mode == SFmode)
13130 return FP_REGS;
13131 sri->icode = CODE_FOR_reload_insi__i_fpul;
13132 return NO_REGS;
13134 if (rclass == FPSCR_REGS
13135 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13136 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13137 return GENERAL_REGS;
13138 if (REGCLASS_HAS_FP_REG (rclass)
13139 && TARGET_SHMEDIA
13140 && immediate_operand (x, mode)
13141 && x != CONST0_RTX (GET_MODE (x))
13142 && GET_MODE (x) != V4SFmode)
13143 return GENERAL_REGS;
13144 if ((mode == QImode || mode == HImode)
13145 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13147 sri->icode = ((mode == QImode)
13148 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13149 return NO_REGS;
13151 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13152 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13153 return TARGET_REGS;
13154 } /* end of input-only processing. */
13156 if (((REGCLASS_HAS_FP_REG (rclass)
13157 && (REG_P (x)
13158 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13159 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13160 && TARGET_FMOVD))))
13161 || (REGCLASS_HAS_GENERAL_REG (rclass)
13162 && REG_P (x)
13163 && FP_REGISTER_P (REGNO (x))))
13164 && ! TARGET_SHMEDIA
13165 && (mode == SFmode || mode == SImode))
13166 return FPUL_REGS;
13167 if ((rclass == FPUL_REGS
13168 || (REGCLASS_HAS_FP_REG (rclass)
13169 && ! TARGET_SHMEDIA && mode == SImode))
13170 && (MEM_P (x)
13171 || (REG_P (x)
13172 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13173 || REGNO (x) == T_REG
13174 || system_reg_operand (x, VOIDmode)))))
13176 if (rclass == FPUL_REGS)
13177 return GENERAL_REGS;
13178 return FPUL_REGS;
13180 if ((rclass == TARGET_REGS
13181 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13182 && !satisfies_constraint_Csy (x)
13183 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13184 return GENERAL_REGS;
13185 if ((rclass == MAC_REGS || rclass == PR_REGS)
13186 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13187 && rclass != REGNO_REG_CLASS (REGNO (x)))
13188 return GENERAL_REGS;
13189 if (rclass != GENERAL_REGS && REG_P (x)
13190 && TARGET_REGISTER_P (REGNO (x)))
13191 return GENERAL_REGS;
13193 /* If here fall back to loading FPUL register through general registers.
13194 This case can happen when movsi_ie insn is picked initially to
13195 load/store the FPUL register from/to another register, and then the
13196 other register is allocated on the stack. */
13197 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13198 return GENERAL_REGS;
13200 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13201 the other operand.
13202 On SH2A could also just leave it alone here, which would result in a
13203 4 byte move insn being generated instead. However, for this to work
13204 the insns must have the appropriate alternatives. */
13205 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13206 && satisfies_constraint_Sdd (x)
13207 && sh_disp_addr_displacement (x)
13208 <= sh_max_mov_insn_displacement (mode, false))
13209 return R0_REGS;
13211 /* When reload is trying to address a QImode or HImode subreg on the stack,
13212 force any subreg byte into R0_REGS, as this is going to become a
13213 displacement address.
13214 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13215 is on the stack, the memref to it might already require a displacement
13216 and that has to be added to the final address. At this point we don't
13217 know the cumulative displacement so we assume the worst case. */
13218 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13219 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13220 return R0_REGS;
13222 return NO_REGS;
13225 static void
13226 sh_conditional_register_usage (void)
13228 int regno;
13229 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13230 if (! VALID_REGISTER_P (regno))
13231 fixed_regs[regno] = call_used_regs[regno] = 1;
13232 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13233 if (TARGET_SH5)
13235 call_used_regs[FIRST_GENERAL_REG + 8]
13236 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13237 call_really_used_regs[FIRST_GENERAL_REG + 8]
13238 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13240 if (TARGET_SHMEDIA)
13242 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13243 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13244 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13246 if (flag_pic)
13248 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13249 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13251 /* Renesas saves and restores mac registers on call. */
13252 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13254 call_really_used_regs[MACH_REG] = 0;
13255 call_really_used_regs[MACL_REG] = 0;
13258 if (TARGET_SHMEDIA)
13260 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13261 if (! fixed_regs[regno] && call_really_used_regs[regno])
13262 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13264 else
13265 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13266 if (! fixed_regs[regno] && call_really_used_regs[regno])
13267 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13270 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13272 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13273 static bool
13274 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
13276 return (TARGET_SHMEDIA
13277 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13278 || x == CONST0_RTX (mode)
13279 || !TARGET_SHMEDIA_FPU
13280 || TARGET_SHMEDIA64)
13281 : (GET_CODE (x) != CONST_DOUBLE
13282 || mode == DFmode || mode == SFmode
13283 || mode == DImode || GET_MODE (x) == VOIDmode));
13286 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13288 static void
13289 sh_init_sync_libfuncs (void)
13291 init_sync_libfuncs (UNITS_PER_WORD);
13294 /* Return true if it is appropriate to emit `ret' instructions in the
13295 body of a function. */
13296 bool
13297 sh_can_use_simple_return_p (void)
13299 HARD_REG_SET live_regs_mask;
13300 int d;
13302 /* Some targets require special return insns. */
13303 if (TARGET_SHMEDIA
13304 || (TARGET_SHCOMPACT
13305 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13306 return false;
13308 if (! reload_completed || frame_pointer_needed)
13309 return false;
13311 /* Moving prologue around does't reduce the size. */
13312 if (optimize_function_for_size_p (cfun))
13313 return false;
13315 /* Finally, allow for pr save. */
13316 d = calc_live_regs (&live_regs_mask);
13318 if (rounded_frame_size (d) > 4)
13319 return false;
13321 return true;
13324 /*------------------------------------------------------------------------------
13325 Address mode optimization support code
13328 typedef HOST_WIDE_INT disp_t;
13329 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13330 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13331 static const disp_t INVALID_DISP = MAX_DISP;
13333 /* A memory reference which is described by a base register and a
13334 displacement. */
13335 class base_reg_disp
13337 public:
13338 base_reg_disp (rtx br, disp_t d);
13340 bool is_reg (void) const;
13341 bool is_disp (void) const;
13342 rtx reg (void) const;
13343 disp_t disp (void) const;
13345 private:
13346 rtx reg_;
13347 disp_t disp_;
13350 inline
13351 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13352 : reg_ (br), disp_ (d)
13356 inline bool
13357 base_reg_disp::is_reg (void) const
13359 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13362 inline bool
13363 base_reg_disp::is_disp (void) const
13365 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13368 inline rtx
13369 base_reg_disp::reg (void) const
13371 return reg_;
13374 inline disp_t
13375 base_reg_disp::disp (void) const
13377 return disp_;
13380 /* Find the base register and calculate the displacement for a given
13381 address rtx 'x'.
13382 This is done by walking the insn list backwards and following SET insns
13383 that set the value of the specified reg 'x'. */
13384 static base_reg_disp
13385 sh_find_base_reg_disp (rtx insn, rtx x, disp_t disp = 0, rtx base_reg = NULL)
13387 if (REG_P (x))
13389 if (REGNO (x) == GBR_REG)
13390 return base_reg_disp (x, disp);
13392 /* We've reached a hard-reg. This is probably the point where
13393 function args are copied to pseudos. Do not go any further and
13394 stick to the pseudo. If the original mem addr was in a hard reg
13395 from the beginning, it will become the base reg. */
13396 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13397 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13399 /* Try to find the previous insn that sets the reg. */
13400 for (rtx i = prev_nonnote_insn (insn); i != NULL;
13401 i = prev_nonnote_insn (i))
13403 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)
13404 && CALL_P (i))
13405 break;
13407 if (!NONJUMP_INSN_P (i))
13408 continue;
13410 rtx p = PATTERN (i);
13411 if (p != NULL && GET_CODE (p) == SET && REG_P (XEXP (p, 0))
13412 && REGNO (XEXP (p, 0)) == REGNO (x))
13414 /* If the recursion can't find out any more details about the
13415 source of the set, then this reg becomes our new base reg. */
13416 return sh_find_base_reg_disp (i, XEXP (p, 1), disp, XEXP (p, 0));
13420 /* When here, no previous insn was found that sets the reg.
13421 The input reg is already the base reg. */
13422 return base_reg_disp (x, disp);
13425 else if (GET_CODE (x) == PLUS)
13427 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13428 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13430 /* Either left or right val must be a reg.
13431 We don't handle the case of 'reg + reg' here. */
13432 if (left_val.is_reg () && right_val.is_disp ())
13433 return base_reg_disp (left_val.reg (), left_val.disp ()
13434 + right_val.disp () + disp);
13435 else if (right_val.is_reg () && left_val.is_disp ())
13436 return base_reg_disp (right_val.reg (), right_val.disp ()
13437 + left_val.disp () + disp);
13438 else
13439 return base_reg_disp (base_reg, disp);
13442 else if (CONST_INT_P (x))
13443 return base_reg_disp (NULL, disp + INTVAL (x));
13445 /* Didn't find anything useful. */
13446 return base_reg_disp (base_reg, disp);
13449 /* Given an insn and a memory operand, try to find an equivalent GBR
13450 based memory address and return the corresponding new memory address.
13451 Return NULL_RTX if not found. */
13453 sh_find_equiv_gbr_addr (rtx insn, rtx mem)
13455 if (!MEM_P (mem))
13456 return NULL_RTX;
13458 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13459 if (side_effects_p (XEXP (mem, 0)))
13460 return NULL_RTX;
13462 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13464 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13466 rtx disp = GEN_INT (gbr_disp.disp ());
13467 if (gbr_displacement (disp, GET_MODE (mem)))
13468 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13471 return NULL_RTX;
13474 /*------------------------------------------------------------------------------
13475 Manual insn combine support code.
13478 /* Given a reg rtx and a start insn, try to find the insn that sets the
13479 specified reg by using the specified insn stepping function, such as
13480 'prev_nonnote_insn_bb'. When the insn is found, try to extract the rtx
13481 of the reg set. */
13482 set_of_reg
13483 sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx))
13485 set_of_reg result;
13486 result.insn = insn;
13487 result.set_rtx = NULL_RTX;
13488 result.set_src = NULL_RTX;
13490 if (!REG_P (reg) || insn == NULL_RTX)
13491 return result;
13493 for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
13494 result.insn = stepfunc (result.insn))
13496 if (BARRIER_P (result.insn))
13497 return result;
13498 if (!NONJUMP_INSN_P (result.insn))
13499 continue;
13500 if (reg_set_p (reg, result.insn))
13502 result.set_rtx = set_of (reg, result.insn);
13504 if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
13505 return result;
13507 result.set_src = XEXP (result.set_rtx, 1);
13508 return result;
13512 return result;
13515 /* Given an op rtx and an insn, try to find out whether the result of the
13516 specified op consists only of logical operations on T bit stores. */
13517 bool
13518 sh_is_logical_t_store_expr (rtx op, rtx insn)
13520 if (!logical_operator (op, SImode))
13521 return false;
13523 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13524 int op_is_t_count = 0;
13526 for (int i = 0; i < 2; ++i)
13528 if (t_reg_operand (ops[i], VOIDmode)
13529 || negt_reg_operand (ops[i], VOIDmode))
13530 op_is_t_count++;
13532 else
13534 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13535 prev_nonnote_insn_bb);
13536 if (op_set.set_src == NULL_RTX)
13537 continue;
13539 if (t_reg_operand (op_set.set_src, VOIDmode)
13540 || negt_reg_operand (op_set.set_src, VOIDmode)
13541 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13542 op_is_t_count++;
13546 return op_is_t_count == 2;
13549 /* Given the operand that is extended in a sign/zero extend insn, and the
13550 insn, try to figure out whether the sign/zero extension can be replaced
13551 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13552 NULL_RTX otherwise. */
13554 sh_try_omit_signzero_extend (rtx extended_op, rtx insn)
13556 if (REG_P (extended_op))
13557 extended_op = extended_op;
13558 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13559 extended_op = SUBREG_REG (extended_op);
13560 else
13561 return NULL_RTX;
13563 /* Reg moves must be of the same mode. */
13564 if (GET_MODE (extended_op) != SImode)
13565 return NULL_RTX;
13567 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13568 if (s.set_src == NULL_RTX)
13569 return NULL_RTX;
13571 if (t_reg_operand (s.set_src, VOIDmode)
13572 || negt_reg_operand (s.set_src, VOIDmode))
13573 return extended_op;
13575 /* If the zero extended reg was formed by a logical operation, check the
13576 operands of the logical operation. If both originated from T bit
13577 stores the zero extension can be eliminated. */
13578 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13579 return extended_op;
13581 return NULL_RTX;
13584 static void
13585 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
13586 int prev_mode, HARD_REG_SET regs_live)
13588 if ((TARGET_SH4A_FP || TARGET_SH4_300)
13589 && prev_mode != FP_MODE_NONE && prev_mode != mode)
13591 emit_insn (gen_toggle_pr ());
13592 if (TARGET_FMOVD)
13593 emit_insn (gen_toggle_sz ());
13595 else
13596 fpscr_set_from_mem (mode, regs_live);
13599 static int
13600 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx insn)
13602 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
13605 static int
13606 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx insn)
13608 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
13609 get_attr_fp_set (insn) != FP_SET_NONE)
13610 return (int) get_attr_fp_set (insn);
13611 else
13612 return mode;
13615 static int
13616 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
13618 return NORMAL_MODE (entity);
13621 static int
13622 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
13624 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
13627 static int
13628 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
13630 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
13633 #include "gt-sh.h"