2012-10-18 Jan-Benedict Glaw <jbglaw@lug-owl.de>
[official-gcc.git] / gcc / config / sh / sh.c
blob4d65685a857d069df22cd98d21f209bf78fc3c88
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
4 Free Software Foundation, Inc.
5 Contributed by Steve Chamberlain (sac@cygnus.com).
6 Improved by Jim Wilson (wilson@cygnus.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
15 GCC is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
24 /* FIXME: This is a temporary hack, so that we can include <algorithm>
25 below. <algorithm> will try to include <cstdlib> which will reference
26 malloc & co, which are poisoned by "system.h". The proper solution is
27 to include <cstdlib> in "system.h" instead of <stdlib.h>. */
28 #include <cstdlib>
30 #include "config.h"
31 #include "system.h"
32 #include "coretypes.h"
33 #include "tm.h"
34 #include "insn-config.h"
35 #include "rtl.h"
36 #include "tree.h"
37 #include "flags.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "output.h"
45 #include "insn-attr.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "dwarf2.h"
49 #include "tm_p.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "langhooks.h"
53 #include "basic-block.h"
54 #include "df.h"
55 #include "intl.h"
56 #include "sched-int.h"
57 #include "params.h"
58 #include "ggc.h"
59 #include "gimple.h"
60 #include "cfgloop.h"
61 #include "alloc-pool.h"
62 #include "tm-constrs.h"
63 #include "opts.h"
65 #include <sstream>
66 #include <vector>
67 #include <algorithm>
69 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
71 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
72 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
74 /* These are some macros to abstract register modes. */
75 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
76 && ((HOST_WIDE_INT)(VALUE)) <= 511)
78 #define CONST_OK_FOR_ADD(size) \
79 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
80 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
81 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
82 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
84 /* Used to simplify the logic below. Find the attributes wherever
85 they may be. */
86 #define SH_ATTRIBUTES(decl) \
87 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
88 : DECL_ATTRIBUTES (decl) \
89 ? (DECL_ATTRIBUTES (decl)) \
90 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
92 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
93 int current_function_interrupt;
95 tree sh_deferred_function_attributes;
96 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
98 /* Global variables for machine-dependent things. */
100 /* Which cpu are we scheduling for. */
101 enum processor_type sh_cpu;
103 /* Definitions used in ready queue reordering for first scheduling pass. */
105 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
106 static short *regmode_weight[2];
108 /* Total SFmode and SImode weights of scheduled insns. */
109 static int curr_regmode_pressure[2];
111 /* Number of r0 life regions. */
112 static int r0_life_regions;
114 /* If true, skip cycles for Q -> R movement. */
115 static int skip_cycles = 0;
117 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
118 and returned from sh_reorder2. */
119 static short cached_can_issue_more;
121 /* Unique number for UNSPEC_BBR pattern. */
122 static unsigned int unspec_bbr_uid = 1;
124 /* Provides the class number of the smallest class containing
125 reg number. */
127 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
129 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
162 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
163 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
164 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
165 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
166 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
167 GENERAL_REGS, GENERAL_REGS,
170 char sh_register_names[FIRST_PSEUDO_REGISTER] \
171 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
173 char sh_additional_register_names[ADDREGNAMES_SIZE] \
174 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
175 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
177 int assembler_dialect;
179 static bool shmedia_space_reserved_for_target_registers;
181 static void split_branches (rtx);
182 static int branch_dest (rtx);
183 static void force_into (rtx, rtx);
184 static void print_slot (rtx);
185 static rtx add_constant (rtx, enum machine_mode, rtx);
186 static void dump_table (rtx, rtx);
187 static bool broken_move (rtx);
188 static bool mova_p (rtx);
189 static rtx find_barrier (int, rtx, rtx);
190 static bool noncall_uses_reg (rtx, rtx, rtx *);
191 static rtx gen_block_redirect (rtx, int, int);
192 static void sh_reorg (void);
193 static void sh_option_override (void);
194 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
195 static rtx frame_insn (rtx);
196 static rtx push (int);
197 static void pop (int);
198 static void push_regs (HARD_REG_SET *, int);
199 static int calc_live_regs (HARD_REG_SET *);
200 static HOST_WIDE_INT rounded_frame_size (int);
201 static bool sh_frame_pointer_required (void);
202 static rtx mark_constant_pool_use (rtx);
203 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
204 static tree sh_handle_resbank_handler_attribute (tree *, tree,
205 tree, int, bool *);
206 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
207 tree, int, bool *);
208 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
209 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
210 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
211 static void sh_print_operand (FILE *, rtx, int);
212 static void sh_print_operand_address (FILE *, rtx);
213 static bool sh_print_operand_punct_valid_p (unsigned char code);
214 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
215 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
216 static void sh_insert_attributes (tree, tree *);
217 static const char *sh_check_pch_target_flags (int);
218 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
219 static int sh_adjust_cost (rtx, rtx, rtx, int);
220 static int sh_issue_rate (void);
221 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
222 static short find_set_regmode_weight (rtx, enum machine_mode);
223 static short find_insn_regmode_weight (rtx, enum machine_mode);
224 static void find_regmode_weight (basic_block, enum machine_mode);
225 static int find_r0_life_regions (basic_block);
226 static void sh_md_init_global (FILE *, int, int);
227 static void sh_md_finish_global (FILE *, int);
228 static int rank_for_reorder (const void *, const void *);
229 static void swap_reorder (rtx *, int);
230 static void ready_reorder (rtx *, int);
231 static bool high_pressure (enum machine_mode);
232 static int sh_reorder (FILE *, int, rtx *, int *, int);
233 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
234 static void sh_md_init (FILE *, int, int);
235 static int sh_variable_issue (FILE *, int, rtx, int);
237 static bool sh_function_ok_for_sibcall (tree, tree);
239 static bool sh_cannot_modify_jumps_p (void);
240 static reg_class_t sh_target_reg_class (void);
241 static bool sh_optimize_target_register_callee_saved (bool);
242 static bool sh_ms_bitfield_layout_p (const_tree);
244 static void sh_init_builtins (void);
245 static tree sh_builtin_decl (unsigned, bool);
246 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
247 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
248 static void sh_file_start (void);
249 static bool flow_dependent_p (rtx, rtx);
250 static void flow_dependent_p_1 (rtx, const_rtx, void *);
251 static int shiftcosts (rtx);
252 static int and_xor_ior_costs (rtx, int);
253 static int addsubcosts (rtx);
254 static int multcosts (rtx);
255 static bool unspec_caller_rtx_p (rtx);
256 static bool sh_cannot_copy_insn_p (rtx);
257 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
258 static int sh_address_cost (rtx, enum machine_mode, addr_space_t, bool);
259 static int sh_pr_n_sets (void);
260 static rtx sh_allocate_initial_value (rtx);
261 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
262 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
263 enum machine_mode,
264 struct secondary_reload_info *);
265 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
266 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
267 static rtx sh_delegitimize_address (rtx);
268 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
269 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
270 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
271 static int scavenge_reg (HARD_REG_SET *s);
272 struct save_schedule_s;
273 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
274 struct save_schedule_s *, int);
276 static rtx sh_struct_value_rtx (tree, int);
277 static rtx sh_function_value (const_tree, const_tree, bool);
278 static bool sh_function_value_regno_p (const unsigned int);
279 static rtx sh_libcall_value (enum machine_mode, const_rtx);
280 static bool sh_return_in_memory (const_tree, const_tree);
281 static rtx sh_builtin_saveregs (void);
282 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode, tree, int *, int);
283 static bool sh_strict_argument_naming (cumulative_args_t);
284 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
285 static tree sh_build_builtin_va_list (void);
286 static void sh_va_start (tree, rtx);
287 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
288 static bool sh_promote_prototypes (const_tree);
289 static enum machine_mode sh_promote_function_mode (const_tree type,
290 enum machine_mode,
291 int *punsignedp,
292 const_tree funtype,
293 int for_return);
294 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
295 const_tree, bool);
296 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
297 const_tree, bool);
298 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
299 tree, bool);
300 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
301 const_tree, bool);
302 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
303 const_tree, bool);
304 static bool sh_scalar_mode_supported_p (enum machine_mode);
305 static int sh_dwarf_calling_convention (const_tree);
306 static void sh_encode_section_info (tree, rtx, int);
307 static bool sh2a_function_vector_p (tree);
308 static void sh_trampoline_init (rtx, tree, rtx);
309 static rtx sh_trampoline_adjust_address (rtx);
310 static void sh_conditional_register_usage (void);
311 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
312 static int mov_insn_size (enum machine_mode, bool);
313 static int max_mov_insn_displacement (enum machine_mode, bool);
314 static int mov_insn_alignment_mask (enum machine_mode, bool);
315 static HOST_WIDE_INT disp_addr_displacement (rtx);
316 static bool sequence_insn_p (rtx);
318 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
320 static const struct attribute_spec sh_attribute_table[] =
322 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
323 affects_type_identity } */
324 { "interrupt_handler", 0, 0, true, false, false,
325 sh_handle_interrupt_handler_attribute, false },
326 { "sp_switch", 1, 1, true, false, false,
327 sh_handle_sp_switch_attribute, false },
328 { "trap_exit", 1, 1, true, false, false,
329 sh_handle_trap_exit_attribute, false },
330 { "renesas", 0, 0, false, true, false,
331 sh_handle_renesas_attribute, false },
332 { "trapa_handler", 0, 0, true, false, false,
333 sh_handle_interrupt_handler_attribute, false },
334 { "nosave_low_regs", 0, 0, true, false, false,
335 sh_handle_interrupt_handler_attribute, false },
336 { "resbank", 0, 0, true, false, false,
337 sh_handle_resbank_handler_attribute, false },
338 { "function_vector", 1, 1, true, false, false,
339 sh2a_handle_function_vector_handler_attribute, false },
340 { NULL, 0, 0, false, false, false, NULL, false }
343 /* Initialize the GCC target structure. */
344 #undef TARGET_ATTRIBUTE_TABLE
345 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
347 /* The next two are used for debug info when compiling with -gdwarf. */
348 #undef TARGET_ASM_UNALIGNED_HI_OP
349 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
350 #undef TARGET_ASM_UNALIGNED_SI_OP
351 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
353 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
354 #undef TARGET_ASM_UNALIGNED_DI_OP
355 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
356 #undef TARGET_ASM_ALIGNED_DI_OP
357 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
359 #undef TARGET_OPTION_OVERRIDE
360 #define TARGET_OPTION_OVERRIDE sh_option_override
362 #undef TARGET_PRINT_OPERAND
363 #define TARGET_PRINT_OPERAND sh_print_operand
364 #undef TARGET_PRINT_OPERAND_ADDRESS
365 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
366 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
367 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
371 #undef TARGET_ASM_FUNCTION_EPILOGUE
372 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
374 #undef TARGET_ASM_OUTPUT_MI_THUNK
375 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
377 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
378 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
380 #undef TARGET_ASM_FILE_START
381 #define TARGET_ASM_FILE_START sh_file_start
382 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
383 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
385 #undef TARGET_REGISTER_MOVE_COST
386 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
388 #undef TARGET_INSERT_ATTRIBUTES
389 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
391 #undef TARGET_SCHED_ADJUST_COST
392 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
394 #undef TARGET_SCHED_ISSUE_RATE
395 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
397 /* The next 5 hooks have been implemented for reenabling sched1. With the
398 help of these macros we are limiting the movement of insns in sched1 to
399 reduce the register pressure. The overall idea is to keep count of SImode
400 and SFmode regs required by already scheduled insns. When these counts
401 cross some threshold values; give priority to insns that free registers.
402 The insn that frees registers is most likely to be the insn with lowest
403 LUID (original insn order); but such an insn might be there in the stalled
404 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
405 up to a max of 8 cycles so that such insns may move from Q -> R.
407 The description of the hooks are as below:
409 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
410 scheduler; it is called inside the sched_init function just after
411 find_insn_reg_weights function call. It is used to calculate the SImode
412 and SFmode weights of insns of basic blocks; much similar to what
413 find_insn_reg_weights does.
414 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
416 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
417 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
418 (Q)->(R).
420 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
421 high; reorder the ready queue so that the insn with lowest LUID will be
422 issued next.
424 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
425 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
427 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
428 can be returned from TARGET_SCHED_REORDER2.
430 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
432 #undef TARGET_SCHED_DFA_NEW_CYCLE
433 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
435 #undef TARGET_SCHED_INIT_GLOBAL
436 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
438 #undef TARGET_SCHED_FINISH_GLOBAL
439 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
441 #undef TARGET_SCHED_VARIABLE_ISSUE
442 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
444 #undef TARGET_SCHED_REORDER
445 #define TARGET_SCHED_REORDER sh_reorder
447 #undef TARGET_SCHED_REORDER2
448 #define TARGET_SCHED_REORDER2 sh_reorder2
450 #undef TARGET_SCHED_INIT
451 #define TARGET_SCHED_INIT sh_md_init
453 #undef TARGET_DELEGITIMIZE_ADDRESS
454 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
456 #undef TARGET_LEGITIMIZE_ADDRESS
457 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
459 #undef TARGET_CANNOT_MODIFY_JUMPS_P
460 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
461 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
462 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
463 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
464 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
465 sh_optimize_target_register_callee_saved
467 #undef TARGET_MS_BITFIELD_LAYOUT_P
468 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
470 #undef TARGET_INIT_BUILTINS
471 #define TARGET_INIT_BUILTINS sh_init_builtins
472 #undef TARGET_BUILTIN_DECL
473 #define TARGET_BUILTIN_DECL sh_builtin_decl
474 #undef TARGET_EXPAND_BUILTIN
475 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
477 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
478 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
480 #undef TARGET_CANNOT_COPY_INSN_P
481 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
482 #undef TARGET_RTX_COSTS
483 #define TARGET_RTX_COSTS sh_rtx_costs
484 #undef TARGET_ADDRESS_COST
485 #define TARGET_ADDRESS_COST sh_address_cost
486 #undef TARGET_ALLOCATE_INITIAL_VALUE
487 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
489 #undef TARGET_MACHINE_DEPENDENT_REORG
490 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
492 #undef TARGET_DWARF_REGISTER_SPAN
493 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
495 #ifdef HAVE_AS_TLS
496 #undef TARGET_HAVE_TLS
497 #define TARGET_HAVE_TLS true
498 #endif
500 #undef TARGET_PROMOTE_PROTOTYPES
501 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
502 #undef TARGET_PROMOTE_FUNCTION_MODE
503 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
505 #undef TARGET_FUNCTION_VALUE
506 #define TARGET_FUNCTION_VALUE sh_function_value
507 #undef TARGET_FUNCTION_VALUE_REGNO_P
508 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
509 #undef TARGET_LIBCALL_VALUE
510 #define TARGET_LIBCALL_VALUE sh_libcall_value
511 #undef TARGET_STRUCT_VALUE_RTX
512 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
513 #undef TARGET_RETURN_IN_MEMORY
514 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
516 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
517 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
518 #undef TARGET_SETUP_INCOMING_VARARGS
519 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
520 #undef TARGET_STRICT_ARGUMENT_NAMING
521 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
522 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
523 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
524 #undef TARGET_MUST_PASS_IN_STACK
525 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
526 #undef TARGET_PASS_BY_REFERENCE
527 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
528 #undef TARGET_CALLEE_COPIES
529 #define TARGET_CALLEE_COPIES sh_callee_copies
530 #undef TARGET_ARG_PARTIAL_BYTES
531 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
532 #undef TARGET_FUNCTION_ARG
533 #define TARGET_FUNCTION_ARG sh_function_arg
534 #undef TARGET_FUNCTION_ARG_ADVANCE
535 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
537 #undef TARGET_BUILD_BUILTIN_VA_LIST
538 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
539 #undef TARGET_EXPAND_BUILTIN_VA_START
540 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
541 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
542 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
544 #undef TARGET_SCALAR_MODE_SUPPORTED_P
545 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
546 #undef TARGET_VECTOR_MODE_SUPPORTED_P
547 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
549 #undef TARGET_CHECK_PCH_TARGET_FLAGS
550 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
552 #undef TARGET_DWARF_CALLING_CONVENTION
553 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
555 #undef TARGET_FRAME_POINTER_REQUIRED
556 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
558 /* Return regmode weight for insn. */
559 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
560 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
562 /* Return current register pressure for regmode. */
563 #define CURR_REGMODE_PRESSURE(MODE)\
564 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
566 #undef TARGET_ENCODE_SECTION_INFO
567 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
569 #undef TARGET_SECONDARY_RELOAD
570 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
572 #undef TARGET_PREFERRED_RELOAD_CLASS
573 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
575 #undef TARGET_CONDITIONAL_REGISTER_USAGE
576 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
578 #undef TARGET_LEGITIMATE_ADDRESS_P
579 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
581 #undef TARGET_TRAMPOLINE_INIT
582 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
583 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
584 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
586 #undef TARGET_LEGITIMATE_CONSTANT_P
587 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
589 /* Machine-specific symbol_ref flags. */
590 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
592 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
593 is used by optabs.c atomic op expansion code as well as in sync.md. */
594 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
595 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
597 struct gcc_target targetm = TARGET_INITIALIZER;
600 /* Information on the currently selected atomic model.
601 This is initialized in sh_option_override. */
602 static sh_atomic_model selected_atomic_model_;
604 const sh_atomic_model&
605 selected_atomic_model (void)
607 return selected_atomic_model_;
610 static sh_atomic_model
611 parse_validate_atomic_model_option (const char* str)
613 const char* model_names[sh_atomic_model::num_models];
614 model_names[sh_atomic_model::none] = "none";
615 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
616 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
617 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
618 model_names[sh_atomic_model::soft_imask] = "soft-imask";
620 const char* model_cdef_names[sh_atomic_model::num_models];
621 model_cdef_names[sh_atomic_model::none] = "NONE";
622 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
623 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
624 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
625 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
627 sh_atomic_model ret;
628 ret.type = sh_atomic_model::none;
629 ret.name = model_names[sh_atomic_model::none];
630 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
631 ret.strict = false;
632 ret.tcb_gbr_offset = -1;
634 /* Handle empty string as 'none'. */
635 if (str == NULL || *str == '\0')
636 return ret;
638 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
640 std::vector<std::string> tokens;
641 for (std::stringstream ss (str); ss.good (); )
643 tokens.push_back (std::string ());
644 std::getline (ss, tokens.back (), ',');
647 if (tokens.empty ())
648 err_ret ("invalid atomic model option");
650 /* The first token must be the atomic model name. */
652 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
653 if (tokens.front () == model_names[i])
655 ret.type = (sh_atomic_model::enum_type)i;
656 ret.name = model_names[i];
657 ret.cdef_name = model_cdef_names[i];
658 goto got_mode_name;
661 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
662 got_mode_name:;
665 /* Go through the remaining tokens. */
666 for (size_t i = 1; i < tokens.size (); ++i)
668 if (tokens[i] == "strict")
669 ret.strict = true;
670 else if (tokens[i].find ("gbr-offset=") == 0)
672 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
673 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
674 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
675 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
676 "option", offset_str.c_str ());
678 else
679 err_ret ("unknown parameter \"%s\" in atomic model option",
680 tokens[i].c_str ());
683 /* Check that the selection makes sense. */
684 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
685 err_ret ("atomic operations are not supported on SHmedia");
687 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
688 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
689 ret.name);
691 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
692 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
694 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
695 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
697 if (ret.type == sh_atomic_model::soft_tcb
698 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
699 || (ret.tcb_gbr_offset & 3) != 0))
700 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
701 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
702 ret.name);
704 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
705 err_ret ("cannot use atomic model %s in user mode", ret.name);
707 return ret;
709 #undef err_ret
712 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
713 various options, and do some machine dependent initialization. */
714 static void
715 sh_option_override (void)
717 int regno;
719 SUBTARGET_OVERRIDE_OPTIONS;
720 if (optimize > 1 && !optimize_size)
721 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
722 sh_cpu = PROCESSOR_SH1;
723 assembler_dialect = 0;
724 if (TARGET_SH2)
725 sh_cpu = PROCESSOR_SH2;
726 if (TARGET_SH2E)
727 sh_cpu = PROCESSOR_SH2E;
728 if (TARGET_SH2A)
729 sh_cpu = PROCESSOR_SH2A;
730 if (TARGET_SH3)
731 sh_cpu = PROCESSOR_SH3;
732 if (TARGET_SH3E)
733 sh_cpu = PROCESSOR_SH3E;
734 if (TARGET_SH4)
736 assembler_dialect = 1;
737 sh_cpu = PROCESSOR_SH4;
739 if (TARGET_SH4A_ARCH)
741 assembler_dialect = 1;
742 sh_cpu = PROCESSOR_SH4A;
744 if (TARGET_SH5)
746 sh_cpu = PROCESSOR_SH5;
747 target_flags |= MASK_ALIGN_DOUBLE;
748 if (TARGET_SHMEDIA_FPU)
749 target_flags |= MASK_FMOVD;
750 if (TARGET_SHMEDIA)
752 /* There are no delay slots on SHmedia. */
753 flag_delayed_branch = 0;
754 /* Relaxation isn't yet supported for SHmedia */
755 target_flags &= ~MASK_RELAX;
756 /* After reload, if conversion does little good but can cause
757 ICEs:
758 - find_if_block doesn't do anything for SH because we don't
759 have conditional execution patterns. (We use conditional
760 move patterns, which are handled differently, and only
761 before reload).
762 - find_cond_trap doesn't do anything for the SH because we
763 don't have conditional traps.
764 - find_if_case_1 uses redirect_edge_and_branch_force in
765 the only path that does an optimization, and this causes
766 an ICE when branch targets are in registers.
767 - find_if_case_2 doesn't do anything for the SHmedia after
768 reload except when it can redirect a tablejump - and
769 that's rather rare. */
770 flag_if_conversion2 = 0;
771 if (! strcmp (sh_div_str, "call"))
772 sh_div_strategy = SH_DIV_CALL;
773 else if (! strcmp (sh_div_str, "call2"))
774 sh_div_strategy = SH_DIV_CALL2;
775 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
776 sh_div_strategy = SH_DIV_FP;
777 else if (! strcmp (sh_div_str, "inv"))
778 sh_div_strategy = SH_DIV_INV;
779 else if (! strcmp (sh_div_str, "inv:minlat"))
780 sh_div_strategy = SH_DIV_INV_MINLAT;
781 else if (! strcmp (sh_div_str, "inv20u"))
782 sh_div_strategy = SH_DIV_INV20U;
783 else if (! strcmp (sh_div_str, "inv20l"))
784 sh_div_strategy = SH_DIV_INV20L;
785 else if (! strcmp (sh_div_str, "inv:call2"))
786 sh_div_strategy = SH_DIV_INV_CALL2;
787 else if (! strcmp (sh_div_str, "inv:call"))
788 sh_div_strategy = SH_DIV_INV_CALL;
789 else if (! strcmp (sh_div_str, "inv:fp"))
791 if (TARGET_FPU_ANY)
792 sh_div_strategy = SH_DIV_INV_FP;
793 else
794 sh_div_strategy = SH_DIV_INV;
796 TARGET_CBRANCHDI4 = 0;
797 /* Assembler CFI isn't yet fully supported for SHmedia. */
798 flag_dwarf2_cfi_asm = 0;
801 else
803 /* Only the sh64-elf assembler fully supports .quad properly. */
804 targetm.asm_out.aligned_op.di = NULL;
805 targetm.asm_out.unaligned_op.di = NULL;
807 if (TARGET_SH1)
809 if (! strcmp (sh_div_str, "call-div1"))
810 sh_div_strategy = SH_DIV_CALL_DIV1;
811 else if (! strcmp (sh_div_str, "call-fp")
812 && (TARGET_FPU_DOUBLE
813 || (TARGET_HARD_SH4 && TARGET_SH2E)
814 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
815 sh_div_strategy = SH_DIV_CALL_FP;
816 else if (! strcmp (sh_div_str, "call-table") && TARGET_SH2)
817 sh_div_strategy = SH_DIV_CALL_TABLE;
818 else
819 /* Pick one that makes most sense for the target in general.
820 It is not much good to use different functions depending
821 on -Os, since then we'll end up with two different functions
822 when some of the code is compiled for size, and some for
823 speed. */
825 /* SH4 tends to emphasize speed. */
826 if (TARGET_HARD_SH4)
827 sh_div_strategy = SH_DIV_CALL_TABLE;
828 /* These have their own way of doing things. */
829 else if (TARGET_SH2A)
830 sh_div_strategy = SH_DIV_INTRINSIC;
831 /* ??? Should we use the integer SHmedia function instead? */
832 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
833 sh_div_strategy = SH_DIV_CALL_FP;
834 /* SH1 .. SH3 cores often go into small-footprint systems, so
835 default to the smallest implementation available. */
836 else if (TARGET_SH2) /* ??? EXPERIMENTAL */
837 sh_div_strategy = SH_DIV_CALL_TABLE;
838 else
839 sh_div_strategy = SH_DIV_CALL_DIV1;
841 if (!TARGET_SH1)
842 TARGET_PRETEND_CMOVE = 0;
843 if (sh_divsi3_libfunc[0])
844 ; /* User supplied - leave it alone. */
845 else if (TARGET_DIVIDE_CALL_FP)
846 sh_divsi3_libfunc = "__sdivsi3_i4";
847 else if (TARGET_DIVIDE_CALL_TABLE)
848 sh_divsi3_libfunc = "__sdivsi3_i4i";
849 else if (TARGET_SH5)
850 sh_divsi3_libfunc = "__sdivsi3_1";
851 else
852 sh_divsi3_libfunc = "__sdivsi3";
853 if (sh_branch_cost == -1)
855 sh_branch_cost = 1;
857 /* The SH1 does not have delay slots, hence we get a pipeline stall
858 at every branch. The SH4 is superscalar, so the single delay slot
859 is not sufficient to keep both pipelines filled. */
860 if (! TARGET_SH2 || TARGET_HARD_SH4)
861 sh_branch_cost = 2;
864 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
865 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
866 TARGET_ZDCBRANCH = 1;
868 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
869 if (! VALID_REGISTER_P (regno))
870 sh_register_names[regno][0] = '\0';
872 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
873 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
874 sh_additional_register_names[regno][0] = '\0';
876 if ((flag_pic && ! TARGET_PREFERGOT)
877 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
878 flag_no_function_cse = 1;
880 if (targetm.small_register_classes_for_mode_p (VOIDmode)) \
882 /* Never run scheduling before reload, since that can
883 break global alloc, and generates slower code anyway due
884 to the pressure on R0. */
885 /* Enable sched1 for SH4 if the user explicitly requests.
886 When sched1 is enabled, the ready queue will be reordered by
887 the target hooks if pressure is high. We can not do this for
888 PIC, SH3 and lower as they give spill failures for R0. */
889 if (!TARGET_HARD_SH4 || flag_pic)
890 flag_schedule_insns = 0;
891 /* ??? Current exception handling places basic block boundaries
892 after call_insns. It causes the high pressure on R0 and gives
893 spill failures for R0 in reload. See PR 22553 and the thread
894 on gcc-patches
895 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
896 else if (flag_exceptions)
898 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
899 warning (0, "ignoring -fschedule-insns because of exception handling bug");
900 flag_schedule_insns = 0;
902 else if (flag_schedule_insns
903 && !global_options_set.x_flag_schedule_insns)
904 flag_schedule_insns = 0;
907 /* Unwind info is not correct around the CFG unless either a frame
908 pointer is present or M_A_O_A is set. Fixing this requires rewriting
909 unwind info generation to be aware of the CFG and propagating states
910 around edges. */
911 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
912 || flag_exceptions || flag_non_call_exceptions)
913 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
915 warning (0, "unwind tables currently require either a frame pointer "
916 "or -maccumulate-outgoing-args for correctness");
917 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
920 /* Unwinding with -freorder-blocks-and-partition does not work on this
921 architecture, because it requires far jumps to label crossing between
922 hot/cold sections which are rejected on this architecture. */
923 if (flag_reorder_blocks_and_partition)
925 if (flag_exceptions)
927 inform (input_location,
928 "-freorder-blocks-and-partition does not work with "
929 "exceptions on this architecture");
930 flag_reorder_blocks_and_partition = 0;
931 flag_reorder_blocks = 1;
933 else if (flag_unwind_tables)
935 inform (input_location,
936 "-freorder-blocks-and-partition does not support unwind "
937 "info on this architecture");
938 flag_reorder_blocks_and_partition = 0;
939 flag_reorder_blocks = 1;
943 /* Adjust loop, jump and function alignment values (in bytes), if those
944 were not specified by the user using -falign-loops, -falign-jumps
945 and -falign-functions options.
946 32 bit alignment is better for speed, because instructions can be
947 fetched as a pair from a longword boundary. For size use 16 bit
948 alignment to get more compact code.
949 Aligning all jumps increases the code size, even if it might
950 result in slightly faster code. Thus, it is set to the smallest
951 alignment possible if not specified by the user. */
952 if (align_loops == 0)
954 if (TARGET_SH5)
955 align_loops = 8;
956 else
957 align_loops = optimize_size ? 2 : 4;
960 if (align_jumps == 0)
962 if (TARGET_SHMEDIA)
963 align_jumps = 1 << CACHE_LOG;
964 else
965 align_jumps = 2;
967 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
968 align_jumps = TARGET_SHMEDIA ? 4 : 2;
970 if (align_functions == 0)
972 if (TARGET_SHMEDIA)
973 align_functions = optimize_size
974 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
975 else
976 align_functions = optimize_size ? 2 : 4;
979 /* The linker relaxation code breaks when a function contains
980 alignments that are larger than that at the start of a
981 compilation unit. */
982 if (TARGET_RELAX)
984 int min_align
985 = align_loops > align_jumps ? align_loops : align_jumps;
987 /* Also take possible .long constants / mova tables int account. */
988 if (min_align < 4)
989 min_align = 4;
990 if (align_functions < min_align)
991 align_functions = min_align;
994 if (flag_unsafe_math_optimizations)
996 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
997 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
998 TARGET_FSCA = 1;
1000 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1001 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1002 TARGET_FSRRA = 1;
1005 /* Allow fsrra insn only if -funsafe-math-optimizations and
1006 -ffinite-math-only is enabled. */
1007 TARGET_FSRRA = TARGET_FSRRA
1008 && flag_unsafe_math_optimizations
1009 && flag_finite_math_only;
1011 /* If the -mieee option was not explicitly set by the user, turn it on
1012 unless -ffinite-math-only was specified. See also PR 33135. */
1013 if (! global_options_set.x_TARGET_IEEE)
1014 TARGET_IEEE = ! flag_finite_math_only;
1016 if (sh_fixed_range_str)
1017 sh_fix_range (sh_fixed_range_str);
1019 /* This target defaults to strict volatile bitfields. */
1020 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1021 flag_strict_volatile_bitfields = 1;
1023 /* Parse atomic model option and make sure it is valid for the current
1024 target CPU. */
1025 selected_atomic_model_
1026 = parse_validate_atomic_model_option (sh_atomic_model_str);
1029 /* Print the operand address in x to the stream. */
1031 static void
1032 sh_print_operand_address (FILE *stream, rtx x)
1034 switch (GET_CODE (x))
1036 case REG:
1037 case SUBREG:
1038 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1039 break;
1041 case PLUS:
1043 rtx base = XEXP (x, 0);
1044 rtx index = XEXP (x, 1);
1046 switch (GET_CODE (index))
1048 case CONST_INT:
1049 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1050 reg_names[true_regnum (base)]);
1051 break;
1053 case REG:
1054 case SUBREG:
1056 int base_num = true_regnum (base);
1057 int index_num = true_regnum (index);
1059 fprintf (stream, "@(r0,%s)",
1060 reg_names[MAX (base_num, index_num)]);
1061 break;
1064 default:
1065 gcc_unreachable ();
1068 break;
1070 case PRE_DEC:
1071 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1072 break;
1074 case POST_INC:
1075 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1076 break;
1078 default:
1079 x = mark_constant_pool_use (x);
1080 output_addr_const (stream, x);
1081 break;
1085 /* Print operand x (an rtx) in assembler syntax to file stream
1086 according to modifier code.
1088 '.' print a .s if insn needs delay slot
1089 ',' print LOCAL_LABEL_PREFIX
1090 '@' print trap, rte or rts depending upon pragma interruptness
1091 '#' output a nop if there is nothing to put in the delay slot
1092 ''' print likelihood suffix (/u for unlikely).
1093 '>' print branch target if -fverbose-asm
1094 'O' print a constant without the #
1095 'R' print the LSW of a dp value - changes if in little endian
1096 'S' print the MSW of a dp value - changes if in little endian
1097 'T' print the next word of a dp value - same as 'R' in big endian mode.
1098 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1099 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1100 'N' print 'r63' if the operand is (const_int 0).
1101 'd' print a V2SF reg as dN instead of fpN.
1102 'm' print a pair `base,offset' or `base,index', for LD and ST.
1103 'U' Likewise for {LD,ST}{HI,LO}.
1104 'V' print the position of a single bit set.
1105 'W' print the position of a single bit cleared.
1106 't' print a memory address which is a register.
1107 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1108 'o' output an operator. */
1110 static void
1111 sh_print_operand (FILE *stream, rtx x, int code)
1113 int regno;
1114 enum machine_mode mode;
1116 switch (code)
1118 tree trapa_attr;
1120 case '.':
1121 if (final_sequence
1122 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1123 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1124 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1125 break;
1126 case ',':
1127 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1128 break;
1129 case '@':
1130 trapa_attr = lookup_attribute ("trap_exit",
1131 DECL_ATTRIBUTES (current_function_decl));
1132 if (trapa_attr)
1133 fprintf (stream, "trapa #%ld",
1134 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1135 else if (sh_cfun_interrupt_handler_p ())
1137 if (sh_cfun_resbank_handler_p ())
1138 fprintf (stream, "resbank\n");
1139 fprintf (stream, "rte");
1141 else
1142 fprintf (stream, "rts");
1143 break;
1144 case '#':
1145 /* Output a nop if there's nothing in the delay slot. */
1146 if (dbr_sequence_length () == 0)
1147 fprintf (stream, "\n\tnop");
1148 break;
1149 case '\'':
1151 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1153 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1154 fputs ("/u", stream);
1155 break;
1157 case '>':
1158 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1160 fputs ("\t! target: ", stream);
1161 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1163 break;
1164 case 'O':
1165 x = mark_constant_pool_use (x);
1166 output_addr_const (stream, x);
1167 break;
1168 /* N.B.: %R / %S / %T adjust memory addresses by four.
1169 For SHMEDIA, that means they can be used to access the first and
1170 second 32 bit part of a 64 bit (or larger) value that
1171 might be held in floating point registers or memory.
1172 While they can be used to access 64 bit parts of a larger value
1173 held in general purpose registers, that won't work with memory -
1174 neither for fp registers, since the frxx names are used. */
1175 case 'R':
1176 if (REG_P (x) || GET_CODE (x) == SUBREG)
1178 regno = true_regnum (x);
1179 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1180 fputs (reg_names[regno], (stream));
1182 else if (MEM_P (x))
1184 x = adjust_address (x, SImode, 4 * LSW);
1185 sh_print_operand_address (stream, XEXP (x, 0));
1187 else
1189 rtx sub = NULL_RTX;
1191 mode = GET_MODE (x);
1192 if (mode == VOIDmode)
1193 mode = DImode;
1194 if (GET_MODE_SIZE (mode) >= 8)
1195 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1196 if (sub)
1197 sh_print_operand (stream, sub, 0);
1198 else
1199 output_operand_lossage ("invalid operand to %%R");
1201 break;
1202 case 'S':
1203 if (REG_P (x) || GET_CODE (x) == SUBREG)
1205 regno = true_regnum (x);
1206 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1207 fputs (reg_names[regno], (stream));
1209 else if (MEM_P (x))
1211 x = adjust_address (x, SImode, 4 * MSW);
1212 sh_print_operand_address (stream, XEXP (x, 0));
1214 else
1216 rtx sub = NULL_RTX;
1218 mode = GET_MODE (x);
1219 if (mode == VOIDmode)
1220 mode = DImode;
1221 if (GET_MODE_SIZE (mode) >= 8)
1222 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1223 if (sub)
1224 sh_print_operand (stream, sub, 0);
1225 else
1226 output_operand_lossage ("invalid operand to %%S");
1228 break;
1229 case 'T':
1230 /* Next word of a double. */
1231 switch (GET_CODE (x))
1233 case REG:
1234 fputs (reg_names[REGNO (x) + 1], (stream));
1235 break;
1236 case MEM:
1237 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1238 && GET_CODE (XEXP (x, 0)) != POST_INC)
1239 x = adjust_address (x, SImode, 4);
1240 sh_print_operand_address (stream, XEXP (x, 0));
1241 break;
1242 default:
1243 break;
1245 break;
1247 case 't':
1248 gcc_assert (MEM_P (x));
1249 x = XEXP (x, 0);
1250 switch (GET_CODE (x))
1252 case REG:
1253 case SUBREG:
1254 sh_print_operand (stream, x, 0);
1255 break;
1256 default:
1257 break;
1259 break;
1261 case 'o':
1262 switch (GET_CODE (x))
1264 case PLUS: fputs ("add", stream); break;
1265 case MINUS: fputs ("sub", stream); break;
1266 case MULT: fputs ("mul", stream); break;
1267 case DIV: fputs ("div", stream); break;
1268 case EQ: fputs ("eq", stream); break;
1269 case NE: fputs ("ne", stream); break;
1270 case GT: case LT: fputs ("gt", stream); break;
1271 case GE: case LE: fputs ("ge", stream); break;
1272 case GTU: case LTU: fputs ("gtu", stream); break;
1273 case GEU: case LEU: fputs ("geu", stream); break;
1274 default:
1275 break;
1277 break;
1278 case 'M':
1279 if (TARGET_SHMEDIA)
1281 if (MEM_P (x)
1282 && GET_CODE (XEXP (x, 0)) == PLUS
1283 && (REG_P (XEXP (XEXP (x, 0), 1))
1284 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1285 fputc ('x', stream);
1287 else
1289 if (MEM_P (x))
1291 switch (GET_MODE (x))
1293 case QImode: fputs (".b", stream); break;
1294 case HImode: fputs (".w", stream); break;
1295 case SImode: fputs (".l", stream); break;
1296 case SFmode: fputs (".s", stream); break;
1297 case DFmode: fputs (".d", stream); break;
1298 default: gcc_unreachable ();
1302 break;
1304 case 'm':
1305 gcc_assert (MEM_P (x));
1306 x = XEXP (x, 0);
1307 /* Fall through. */
1308 case 'U':
1309 switch (GET_CODE (x))
1311 case REG:
1312 case SUBREG:
1313 sh_print_operand (stream, x, 0);
1314 fputs (", 0", stream);
1315 break;
1317 case PLUS:
1318 sh_print_operand (stream, XEXP (x, 0), 0);
1319 fputs (", ", stream);
1320 sh_print_operand (stream, XEXP (x, 1), 0);
1321 break;
1323 default:
1324 gcc_unreachable ();
1326 break;
1328 case 'V':
1330 int num = exact_log2 (INTVAL (x));
1331 gcc_assert (num >= 0);
1332 fprintf (stream, "#%d", num);
1334 break;
1336 case 'W':
1338 int num = exact_log2 (~INTVAL (x));
1339 gcc_assert (num >= 0);
1340 fprintf (stream, "#%d", num);
1342 break;
1344 case 'd':
1345 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1347 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1348 break;
1350 case 'N':
1351 if (x == CONST0_RTX (GET_MODE (x)))
1353 fprintf ((stream), "r63");
1354 break;
1356 goto default_output;
1357 case 'u':
1358 if (CONST_INT_P (x))
1360 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1361 break;
1363 /* Fall through. */
1365 default_output:
1366 default:
1367 regno = 0;
1368 mode = GET_MODE (x);
1370 switch (GET_CODE (x))
1372 case TRUNCATE:
1374 rtx inner = XEXP (x, 0);
1375 int offset = 0;
1376 enum machine_mode inner_mode;
1378 /* We might see SUBREGs with vector mode registers inside. */
1379 if (GET_CODE (inner) == SUBREG
1380 && (GET_MODE_SIZE (GET_MODE (inner))
1381 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1382 && subreg_lowpart_p (inner))
1383 inner = SUBREG_REG (inner);
1384 if (CONST_INT_P (inner))
1386 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1387 goto default_output;
1389 inner_mode = GET_MODE (inner);
1390 if (GET_CODE (inner) == SUBREG
1391 && (GET_MODE_SIZE (GET_MODE (inner))
1392 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1393 && REG_P (SUBREG_REG (inner)))
1395 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1396 GET_MODE (SUBREG_REG (inner)),
1397 SUBREG_BYTE (inner),
1398 GET_MODE (inner));
1399 inner = SUBREG_REG (inner);
1401 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1402 abort ();
1403 /* Floating point register pairs are always big endian;
1404 general purpose registers are 64 bit wide. */
1405 regno = REGNO (inner);
1406 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1407 - HARD_REGNO_NREGS (regno, mode))
1408 + offset;
1409 x = inner;
1410 goto reg;
1412 case SIGN_EXTEND:
1413 x = XEXP (x, 0);
1414 goto reg;
1415 /* FIXME: We need this on SHmedia32 because reload generates
1416 some sign-extended HI or QI loads into DImode registers
1417 but, because Pmode is SImode, the address ends up with a
1418 subreg:SI of the DImode register. Maybe reload should be
1419 fixed so as to apply alter_subreg to such loads? */
1420 case IF_THEN_ELSE:
1421 gcc_assert (trapping_target_operand (x, VOIDmode));
1422 x = XEXP (XEXP (x, 2), 0);
1423 goto default_output;
1424 case SUBREG:
1425 gcc_assert (SUBREG_BYTE (x) == 0
1426 && REG_P (SUBREG_REG (x)));
1428 x = SUBREG_REG (x);
1429 /* Fall through. */
1431 reg:
1432 case REG:
1433 regno += REGNO (x);
1434 if (FP_REGISTER_P (regno)
1435 && mode == V16SFmode)
1436 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1437 else if (FP_REGISTER_P (REGNO (x))
1438 && mode == V4SFmode)
1439 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1440 else if (REG_P (x)
1441 && mode == V2SFmode)
1442 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1443 else if (FP_REGISTER_P (REGNO (x))
1444 && GET_MODE_SIZE (mode) > 4)
1445 fprintf ((stream), "d%s", reg_names[regno] + 1);
1446 else
1447 fputs (reg_names[regno], (stream));
1448 break;
1450 case MEM:
1451 output_address (XEXP (x, 0));
1452 break;
1454 default:
1455 if (TARGET_SH1)
1456 fputc ('#', stream);
1457 output_addr_const (stream, x);
1458 break;
1460 break;
1464 static bool
1465 sh_print_operand_punct_valid_p (unsigned char code)
1467 return (code == '.' || code == '#' || code == '@' || code == ','
1468 || code == '$' || code == '\'' || code == '>');
1471 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1473 static bool
1474 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1476 if (GET_CODE (x) == UNSPEC)
1478 switch (XINT (x, 1))
1480 case UNSPEC_DATALABEL:
1481 fputs ("datalabel ", file);
1482 output_addr_const (file, XVECEXP (x, 0, 0));
1483 break;
1484 case UNSPEC_PIC:
1485 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1486 output_addr_const (file, XVECEXP (x, 0, 0));
1487 break;
1488 case UNSPEC_GOT:
1489 output_addr_const (file, XVECEXP (x, 0, 0));
1490 fputs ("@GOT", file);
1491 break;
1492 case UNSPEC_GOTOFF:
1493 output_addr_const (file, XVECEXP (x, 0, 0));
1494 fputs ("@GOTOFF", file);
1495 break;
1496 case UNSPEC_PLT:
1497 output_addr_const (file, XVECEXP (x, 0, 0));
1498 fputs ("@PLT", file);
1499 break;
1500 case UNSPEC_GOTPLT:
1501 output_addr_const (file, XVECEXP (x, 0, 0));
1502 fputs ("@GOTPLT", file);
1503 break;
1504 case UNSPEC_DTPOFF:
1505 output_addr_const (file, XVECEXP (x, 0, 0));
1506 fputs ("@DTPOFF", file);
1507 break;
1508 case UNSPEC_GOTTPOFF:
1509 output_addr_const (file, XVECEXP (x, 0, 0));
1510 fputs ("@GOTTPOFF", file);
1511 break;
1512 case UNSPEC_TPOFF:
1513 output_addr_const (file, XVECEXP (x, 0, 0));
1514 fputs ("@TPOFF", file);
1515 break;
1516 case UNSPEC_CALLER:
1518 char name[32];
1519 /* LPCS stands for Label for PIC Call Site. */
1520 targetm.asm_out.generate_internal_label (name, "LPCS",
1521 INTVAL (XVECEXP (x, 0, 0)));
1522 assemble_name (file, name);
1524 break;
1525 case UNSPEC_EXTRACT_S16:
1526 case UNSPEC_EXTRACT_U16:
1528 rtx val, shift;
1530 val = XVECEXP (x, 0, 0);
1531 shift = XVECEXP (x, 0, 1);
1532 fputc ('(', file);
1533 if (shift != const0_rtx)
1534 fputc ('(', file);
1535 if (GET_CODE (val) == CONST
1536 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1538 fputc ('(', file);
1539 output_addr_const (file, val);
1540 fputc (')', file);
1542 else
1543 output_addr_const (file, val);
1544 if (shift != const0_rtx)
1546 fputs (" >> ", file);
1547 output_addr_const (file, shift);
1548 fputc (')', file);
1550 fputs (" & 65535)", file);
1552 break;
1553 case UNSPEC_SYMOFF:
1554 output_addr_const (file, XVECEXP (x, 0, 0));
1555 fputc ('-', file);
1556 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1558 fputc ('(', file);
1559 output_addr_const (file, XVECEXP (x, 0, 1));
1560 fputc (')', file);
1562 else
1563 output_addr_const (file, XVECEXP (x, 0, 1));
1564 break;
1565 case UNSPEC_PCREL_SYMOFF:
1566 output_addr_const (file, XVECEXP (x, 0, 0));
1567 fputs ("-(", file);
1568 output_addr_const (file, XVECEXP (x, 0, 1));
1569 fputs ("-.)", file);
1570 break;
1571 default:
1572 return false;
1574 return true;
1576 else
1577 return false;
1581 /* Encode symbol attributes of a SYMBOL_REF into its
1582 SYMBOL_REF_FLAGS. */
1583 static void
1584 sh_encode_section_info (tree decl, rtx rtl, int first)
1586 default_encode_section_info (decl, rtl, first);
1588 if (TREE_CODE (decl) == FUNCTION_DECL
1589 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1590 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1593 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1594 static void
1595 force_into (rtx value, rtx target)
1597 value = force_operand (value, target);
1598 if (! rtx_equal_p (value, target))
1599 emit_insn (gen_move_insn (target, value));
1602 /* Emit code to perform a block move. Choose the best method.
1604 OPERANDS[0] is the destination.
1605 OPERANDS[1] is the source.
1606 OPERANDS[2] is the size.
1607 OPERANDS[3] is the alignment safe to use. */
1609 bool
1610 expand_block_move (rtx *operands)
1612 int align = INTVAL (operands[3]);
1613 int constp = (CONST_INT_P (operands[2]));
1614 int bytes = (constp ? INTVAL (operands[2]) : 0);
1616 if (! constp)
1617 return false;
1619 /* If we could use mov.l to move words and dest is word-aligned, we
1620 can use movua.l for loads and still generate a relatively short
1621 and efficient sequence. */
1622 if (TARGET_SH4A_ARCH && align < 4
1623 && MEM_ALIGN (operands[0]) >= 32
1624 && can_move_by_pieces (bytes, 32))
1626 rtx dest = copy_rtx (operands[0]);
1627 rtx src = copy_rtx (operands[1]);
1628 /* We could use different pseudos for each copied word, but
1629 since movua can only load into r0, it's kind of
1630 pointless. */
1631 rtx temp = gen_reg_rtx (SImode);
1632 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1633 int copied = 0;
1635 while (copied + 4 <= bytes)
1637 rtx to = adjust_address (dest, SImode, copied);
1638 rtx from = adjust_automodify_address (src, BLKmode,
1639 src_addr, copied);
1641 set_mem_size (from, 4);
1642 emit_insn (gen_movua (temp, from));
1643 emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
1644 emit_move_insn (to, temp);
1645 copied += 4;
1648 if (copied < bytes)
1649 move_by_pieces (adjust_address (dest, BLKmode, copied),
1650 adjust_automodify_address (src, BLKmode,
1651 src_addr, copied),
1652 bytes - copied, align, 0);
1654 return true;
1657 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1658 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1659 if (align < 4 || (bytes % 4 != 0))
1660 return false;
1662 if (TARGET_HARD_SH4)
1664 if (bytes < 12)
1665 return false;
1666 else if (bytes == 12)
1668 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1669 rtx r4 = gen_rtx_REG (SImode, 4);
1670 rtx r5 = gen_rtx_REG (SImode, 5);
1672 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1673 force_into (XEXP (operands[0], 0), r4);
1674 force_into (XEXP (operands[1], 0), r5);
1675 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1676 return true;
1678 else if (! optimize_size)
1680 const char *entry_name;
1681 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1682 int dwords;
1683 rtx r4 = gen_rtx_REG (SImode, 4);
1684 rtx r5 = gen_rtx_REG (SImode, 5);
1685 rtx r6 = gen_rtx_REG (SImode, 6);
1687 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1688 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1689 force_into (XEXP (operands[0], 0), r4);
1690 force_into (XEXP (operands[1], 0), r5);
1692 dwords = bytes >> 3;
1693 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1694 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1695 return true;
1697 else
1698 return false;
1700 if (bytes < 64)
1702 char entry[30];
1703 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1704 rtx r4 = gen_rtx_REG (SImode, 4);
1705 rtx r5 = gen_rtx_REG (SImode, 5);
1707 sprintf (entry, "__movmemSI%d", bytes);
1708 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1709 force_into (XEXP (operands[0], 0), r4);
1710 force_into (XEXP (operands[1], 0), r5);
1711 emit_insn (gen_block_move_real (func_addr_rtx));
1712 return true;
1715 /* This is the same number of bytes as a memcpy call, but to a different
1716 less common function name, so this will occasionally use more space. */
1717 if (! optimize_size)
1719 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1720 int final_switch, while_loop;
1721 rtx r4 = gen_rtx_REG (SImode, 4);
1722 rtx r5 = gen_rtx_REG (SImode, 5);
1723 rtx r6 = gen_rtx_REG (SImode, 6);
1725 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1726 force_into (XEXP (operands[0], 0), r4);
1727 force_into (XEXP (operands[1], 0), r5);
1729 /* r6 controls the size of the move. 16 is decremented from it
1730 for each 64 bytes moved. Then the negative bit left over is used
1731 as an index into a list of move instructions. e.g., a 72 byte move
1732 would be set up with size(r6) = 14, for one iteration through the
1733 big while loop, and a switch of -2 for the last part. */
1735 final_switch = 16 - ((bytes / 4) % 16);
1736 while_loop = ((bytes / 4) / 16 - 1) * 16;
1737 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1738 emit_insn (gen_block_lump_real (func_addr_rtx));
1739 return true;
1742 return false;
1745 /* Prepare operands for a move define_expand; specifically, one of the
1746 operands must be in a register. */
1748 void
1749 prepare_move_operands (rtx operands[], enum machine_mode mode)
1751 if ((mode == SImode || mode == DImode)
1752 && flag_pic
1753 && ! ((mode == Pmode || mode == ptr_mode)
1754 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1756 rtx temp;
1757 if (SYMBOLIC_CONST_P (operands[1]))
1759 if (MEM_P (operands[0]))
1760 operands[1] = force_reg (Pmode, operands[1]);
1761 else if (TARGET_SHMEDIA
1762 && GET_CODE (operands[1]) == LABEL_REF
1763 && target_reg_operand (operands[0], mode))
1764 /* It's ok. */;
1765 else
1767 temp = (!can_create_pseudo_p ()
1768 ? operands[0]
1769 : gen_reg_rtx (Pmode));
1770 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1773 else if (GET_CODE (operands[1]) == CONST
1774 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1775 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1777 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1778 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1779 mode, temp);
1780 operands[1] = expand_binop (mode, add_optab, temp,
1781 XEXP (XEXP (operands[1], 0), 1),
1782 (!can_create_pseudo_p ()
1783 ? temp
1784 : gen_reg_rtx (Pmode)),
1785 0, OPTAB_LIB_WIDEN);
1789 if (! reload_in_progress && ! reload_completed)
1791 /* Copy the source to a register if both operands aren't registers. */
1792 if (! register_operand (operands[0], mode)
1793 && ! sh_register_operand (operands[1], mode))
1794 operands[1] = copy_to_mode_reg (mode, operands[1]);
1796 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1798 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1799 except that we can't use that function because it is static. */
1800 rtx new_rtx = change_address (operands[0], mode, 0);
1801 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1802 operands[0] = new_rtx;
1805 /* This case can happen while generating code to move the result
1806 of a library call to the target. Reject `st r0,@(rX,rY)' because
1807 reload will fail to find a spill register for rX, since r0 is already
1808 being used for the source. */
1809 else if (TARGET_SH1
1810 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1811 && MEM_P (operands[0])
1812 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1813 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1814 operands[1] = copy_to_mode_reg (mode, operands[1]);
1817 if (mode == Pmode || mode == ptr_mode)
1819 rtx op0, op1, opc;
1820 enum tls_model tls_kind;
1822 op0 = operands[0];
1823 op1 = operands[1];
1824 if (GET_CODE (op1) == CONST
1825 && GET_CODE (XEXP (op1, 0)) == PLUS
1826 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1827 != TLS_MODEL_NONE))
1829 opc = XEXP (XEXP (op1, 0), 1);
1830 op1 = XEXP (XEXP (op1, 0), 0);
1832 else
1833 opc = NULL_RTX;
1835 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1837 rtx tga_op1, tga_ret, tmp, tmp2;
1839 if (! flag_pic
1840 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1841 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1842 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1844 /* Don't schedule insns for getting GOT address when
1845 the first scheduling is enabled, to avoid spill
1846 failures for R0. */
1847 if (flag_schedule_insns)
1848 emit_insn (gen_blockage ());
1849 emit_insn (gen_GOTaddr2picreg ());
1850 emit_use (gen_rtx_REG (SImode, PIC_REG));
1851 if (flag_schedule_insns)
1852 emit_insn (gen_blockage ());
1855 switch (tls_kind)
1857 case TLS_MODEL_GLOBAL_DYNAMIC:
1858 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1859 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1860 tmp = gen_reg_rtx (Pmode);
1861 emit_move_insn (tmp, tga_ret);
1862 op1 = tmp;
1863 break;
1865 case TLS_MODEL_LOCAL_DYNAMIC:
1866 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1867 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1869 tmp = gen_reg_rtx (Pmode);
1870 emit_move_insn (tmp, tga_ret);
1872 if (register_operand (op0, Pmode))
1873 tmp2 = op0;
1874 else
1875 tmp2 = gen_reg_rtx (Pmode);
1877 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1878 op1 = tmp2;
1879 break;
1881 case TLS_MODEL_INITIAL_EXEC:
1882 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1883 tmp = gen_sym2GOTTPOFF (op1);
1884 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1885 op1 = tga_op1;
1886 break;
1888 case TLS_MODEL_LOCAL_EXEC:
1889 tmp2 = gen_reg_rtx (Pmode);
1890 emit_insn (gen_store_gbr (tmp2));
1891 tmp = gen_reg_rtx (Pmode);
1892 emit_insn (gen_symTPOFF2reg (tmp, op1));
1894 if (register_operand (op0, Pmode))
1895 op1 = op0;
1896 else
1897 op1 = gen_reg_rtx (Pmode);
1899 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1900 break;
1902 default:
1903 gcc_unreachable ();
1905 if (opc)
1906 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1907 operands[1] = op1;
1912 /* Implement the CANONICALIZE_COMPARISON macro for the combine pass.
1913 This function is also re-used to canonicalize comparisons in cbranch
1914 pattern expanders. */
1915 void
1916 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1917 enum machine_mode mode)
1919 /* When invoked from within the combine pass the mode is not specified,
1920 so try to get it from one of the operands. */
1921 if (mode == VOIDmode)
1922 mode = GET_MODE (op0);
1923 if (mode == VOIDmode)
1924 mode = GET_MODE (op1);
1926 // We need to have a mode to do something useful here.
1927 if (mode == VOIDmode)
1928 return;
1930 // Currently, we don't deal with floats here.
1931 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1932 return;
1934 // Make sure that the constant operand is the second operand.
1935 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1937 std::swap (op0, op1);
1938 cmp = swap_condition (cmp);
1941 if (CONST_INT_P (op1))
1943 /* Try to adjust the constant operand in such a way that available
1944 comparison insns can be utilized better and the constant can be
1945 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1946 constant pool. */
1947 const HOST_WIDE_INT val = INTVAL (op1);
1949 /* x > -1 --> x >= 0
1950 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1951 x <= -1 --> x < 0
1952 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1953 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1955 cmp = cmp == GT ? GE : LT;
1956 op1 = gen_int_mode (val + 1, mode);
1959 /* x >= 1 --> x > 0
1960 x >= 0x80 --> x > 0x7F
1961 x < 1 --> x <= 0
1962 x < 0x80 --> x <= 0x7F */
1963 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1965 cmp = cmp == GE ? GT : LE;
1966 op1 = gen_int_mode (val - 1, mode);
1969 /* unsigned x >= 1 --> x != 0
1970 unsigned x < 1 --> x == 0 */
1971 else if (val == 1 && (cmp == GEU || cmp == LTU))
1973 cmp = cmp == GEU ? NE : EQ;
1974 op1 = CONST0_RTX (mode);
1977 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1978 unsigned x < 0x80 --> unsigned x < 0x7F */
1979 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1981 cmp = cmp == GEU ? GTU : LEU;
1982 op1 = gen_int_mode (val - 1, mode);
1985 /* unsigned x > 0 --> x != 0
1986 unsigned x <= 0 --> x == 0 */
1987 else if (val == 0 && (cmp == GTU || cmp == LEU))
1988 cmp = cmp == GTU ? NE : EQ;
1990 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1991 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1992 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1993 && val == 0x7FFFFFFF)
1995 cmp = cmp == GTU ? LT : GE;
1996 op1 = const0_rtx;
1999 /* unsigned x >= 0x80000000 --> signed x < 0
2000 unsigned x < 0x80000000 --> signed x >= 0 */
2001 else if (mode == SImode && (cmp == GEU || cmp == LTU)
2002 && (unsigned HOST_WIDE_INT)val
2003 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
2005 cmp = cmp == GEU ? LT : GE;
2006 op1 = const0_rtx;
2011 enum rtx_code
2012 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
2013 enum rtx_code comparison)
2015 /* The scratch reg is only available when this is invoked from within
2016 the cbranchdi4_i splitter, through expand_cbranchdi4. */
2017 rtx scratch = NULL_RTX;
2019 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2020 comparison = GET_CODE (operands[0]);
2021 else
2022 scratch = operands[4];
2024 sh_canonicalize_comparison (comparison, operands[1], operands[2], mode);
2026 /* Notice that this function is also invoked after reload by
2027 the cbranchdi4_i pattern, through expand_cbranchdi4. */
2028 rtx op1 = operands[1];
2030 if (can_create_pseudo_p ())
2031 operands[1] = force_reg (mode, op1);
2032 /* When we are handling DImode comparisons, we want to keep constants so
2033 that we can optimize the component comparisons; however, memory loads
2034 are better issued as a whole so that they can be scheduled well.
2035 SImode equality comparisons allow I08 constants, but only when they
2036 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2037 into a register, that register might as well be r0, and we allow the
2038 constant. If it is already in a register, this is likely to be
2039 allocated to a different hard register, thus we load the constant into
2040 a register unless it is zero. */
2041 if (!REG_P (operands[2])
2042 && (!CONST_INT_P (operands[2])
2043 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2044 && ((comparison != EQ && comparison != NE)
2045 || (REG_P (op1) && REGNO (op1) != R0_REG)
2046 || !satisfies_constraint_I08 (operands[2])))))
2048 if (scratch && GET_MODE (scratch) == mode)
2050 emit_move_insn (scratch, operands[2]);
2051 operands[2] = scratch;
2053 else if (can_create_pseudo_p ())
2054 operands[2] = force_reg (mode, operands[2]);
2056 return comparison;
2059 void
2060 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2062 rtx (*branch_expander) (rtx) = gen_branch_true;
2063 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2064 switch (comparison)
2066 case NE: case LT: case LE: case LTU: case LEU:
2067 comparison = reverse_condition (comparison);
2068 branch_expander = gen_branch_false;
2069 default: ;
2071 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
2072 gen_rtx_fmt_ee (comparison, SImode,
2073 operands[1], operands[2])));
2074 rtx jump = emit_jump_insn (branch_expander (operands[3]));
2075 if (probability >= 0)
2076 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
2079 /* ??? How should we distribute probabilities when more than one branch
2080 is generated. So far we only have some ad-hoc observations:
2081 - If the operands are random, they are likely to differ in both parts.
2082 - If comparing items in a hash chain, the operands are random or equal;
2083 operation should be EQ or NE.
2084 - If items are searched in an ordered tree from the root, we can expect
2085 the highpart to be unequal about half of the time; operation should be
2086 an inequality comparison, operands non-constant, and overall probability
2087 about 50%. Likewise for quicksort.
2088 - Range checks will be often made against constants. Even if we assume for
2089 simplicity an even distribution of the non-constant operand over a
2090 sub-range here, the same probability could be generated with differently
2091 wide sub-ranges - as long as the ratio of the part of the subrange that
2092 is before the threshold to the part that comes after the threshold stays
2093 the same. Thus, we can't really tell anything here;
2094 assuming random distribution is at least simple.
2097 bool
2098 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2100 enum rtx_code msw_taken, msw_skip, lsw_taken;
2101 rtx skip_label = NULL_RTX;
2102 rtx op1h, op1l, op2h, op2l;
2103 int num_branches;
2104 int prob, rev_prob;
2105 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2106 rtx scratch = operands[4];
2108 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2109 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2110 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2111 op1l = gen_lowpart (SImode, operands[1]);
2112 op2l = gen_lowpart (SImode, operands[2]);
2113 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2114 prob = split_branch_probability;
2115 rev_prob = REG_BR_PROB_BASE - prob;
2116 switch (comparison)
2118 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2119 That costs 1 cycle more when the first branch can be predicted taken,
2120 but saves us mispredicts because only one branch needs prediction.
2121 It also enables generating the cmpeqdi_t-1 pattern. */
2122 case EQ:
2123 if (TARGET_CMPEQDI_T)
2125 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2126 emit_jump_insn (gen_branch_true (operands[3]));
2127 return true;
2129 msw_skip = NE;
2130 lsw_taken = EQ;
2131 if (prob >= 0)
2133 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2134 msw_skip_prob = rev_prob;
2135 if (REG_BR_PROB_BASE <= 65535)
2136 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2137 else
2139 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
2140 lsw_taken_prob
2141 = (prob
2142 ? (REG_BR_PROB_BASE
2143 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
2144 / ((HOST_WIDEST_INT) prob << 32)))
2145 : 0);
2148 break;
2149 case NE:
2150 if (TARGET_CMPEQDI_T)
2152 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2153 emit_jump_insn (gen_branch_false (operands[3]));
2154 return true;
2156 msw_taken = NE;
2157 msw_taken_prob = prob;
2158 lsw_taken = NE;
2159 lsw_taken_prob = 0;
2160 break;
2161 case GTU: case GT:
2162 msw_taken = comparison;
2163 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2164 break;
2165 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2166 msw_skip = swap_condition (msw_taken);
2167 lsw_taken = GTU;
2168 break;
2169 case GEU: case GE:
2170 if (op2l == CONST0_RTX (SImode))
2171 msw_taken = comparison;
2172 else
2174 msw_taken = comparison == GE ? GT : GTU;
2175 msw_skip = swap_condition (msw_taken);
2176 lsw_taken = GEU;
2178 break;
2179 case LTU: case LT:
2180 msw_taken = comparison;
2181 if (op2l == CONST0_RTX (SImode))
2182 break;
2183 msw_skip = swap_condition (msw_taken);
2184 lsw_taken = LTU;
2185 break;
2186 case LEU: case LE:
2187 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2188 msw_taken = comparison;
2189 else
2191 lsw_taken = LEU;
2192 if (comparison == LE)
2193 msw_taken = LT;
2194 else if (op2h != CONST0_RTX (SImode))
2195 msw_taken = LTU;
2196 else
2198 msw_skip = swap_condition (LTU);
2199 break;
2201 msw_skip = swap_condition (msw_taken);
2203 break;
2204 default: return false;
2206 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2207 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2208 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2209 if (comparison != EQ && comparison != NE && num_branches > 1)
2211 if (!CONSTANT_P (operands[2])
2212 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2213 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2215 msw_taken_prob = prob / 2U;
2216 msw_skip_prob
2217 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2218 lsw_taken_prob = prob;
2220 else
2222 msw_taken_prob = prob;
2223 msw_skip_prob = REG_BR_PROB_BASE;
2224 /* ??? If we have a constant op2h, should we use that when
2225 calculating lsw_taken_prob? */
2226 lsw_taken_prob = prob;
2229 operands[1] = op1h;
2230 operands[2] = op2h;
2231 operands[4] = NULL_RTX;
2232 if (reload_completed
2233 && ! arith_reg_or_0_operand (op2h, SImode)
2234 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2235 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2236 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2238 emit_move_insn (scratch, operands[2]);
2239 operands[2] = scratch;
2241 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2242 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2243 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2245 rtx taken_label = operands[3];
2247 /* Operands were possibly modified, but msw_skip doesn't expect this.
2248 Always use the original ones. */
2249 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2251 operands[1] = op1h;
2252 operands[2] = op2h;
2253 if (reload_completed
2254 && ! arith_reg_or_0_operand (op2h, SImode)
2255 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2257 emit_move_insn (scratch, operands[2]);
2258 operands[2] = scratch;
2262 operands[3] = skip_label = gen_label_rtx ();
2263 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2264 operands[3] = taken_label;
2266 operands[1] = op1l;
2267 operands[2] = op2l;
2268 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2270 if (reload_completed
2271 && ! arith_reg_or_0_operand (op2l, SImode)
2272 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2274 emit_move_insn (scratch, operands[2]);
2275 operands[2] = scratch;
2277 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2279 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2280 emit_label (skip_label);
2281 return true;
2284 /* Given an operand, return 1 if the evaluated operand plugged into an
2285 if_then_else will result in a branch_true, 0 if branch_false, or
2286 -1 if neither nor applies. The truth table goes like this:
2288 op | cmpval | code | result
2289 ---------+--------+---------+--------------------
2290 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2291 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2292 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2293 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2294 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2295 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2296 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2297 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2299 sh_eval_treg_value (rtx op)
2301 enum rtx_code code = GET_CODE (op);
2302 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2303 return -1;
2305 int cmpop = code == EQ ? 1 : 0;
2306 int cmpval = INTVAL (XEXP (op, 1));
2307 if (cmpval != 0 && cmpval != 1)
2308 return -1;
2310 int t;
2311 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2312 t = 0;
2313 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2314 t = 1;
2315 else
2316 return -1;
2318 return t ^ (cmpval == cmpop);
2321 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2323 static void
2324 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2326 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2328 insn = gen_rtx_PARALLEL (VOIDmode,
2329 gen_rtvec (2, insn,
2330 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2331 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2333 else
2334 emit_insn (insn);
2337 /* Prepare the operands for an scc instruction; make sure that the
2338 compare has been done and the result is in T_REG. */
2339 void
2340 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2342 rtx t_reg = get_t_reg_rtx ();
2343 enum rtx_code oldcode = code;
2344 enum machine_mode mode;
2346 /* First need a compare insn. */
2347 switch (code)
2349 case NE:
2350 /* It isn't possible to handle this case. */
2351 gcc_unreachable ();
2352 case LT:
2353 code = GT;
2354 break;
2355 case LE:
2356 code = GE;
2357 break;
2358 case LTU:
2359 code = GTU;
2360 break;
2361 case LEU:
2362 code = GEU;
2363 break;
2364 default:
2365 break;
2367 if (code != oldcode)
2369 rtx tmp = op0;
2370 op0 = op1;
2371 op1 = tmp;
2374 mode = GET_MODE (op0);
2375 if (mode == VOIDmode)
2376 mode = GET_MODE (op1);
2378 op0 = force_reg (mode, op0);
2379 if ((code != EQ && code != NE
2380 && (op1 != const0_rtx
2381 || code == GTU || code == GEU || code == LTU || code == LEU))
2382 || (mode == DImode && op1 != const0_rtx)
2383 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2384 op1 = force_reg (mode, op1);
2386 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2387 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2388 mode);
2392 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2393 rtx op0, rtx op1)
2395 rtx target = gen_reg_rtx (SImode);
2396 rtx tmp;
2398 gcc_assert (TARGET_SHMEDIA);
2399 switch (code)
2401 case EQ:
2402 case GT:
2403 case LT:
2404 case UNORDERED:
2405 case GTU:
2406 case LTU:
2407 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2408 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2409 code = NE;
2410 break;
2412 case NE:
2413 case GE:
2414 case LE:
2415 case ORDERED:
2416 case GEU:
2417 case LEU:
2418 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2419 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2420 code = EQ;
2421 break;
2423 case UNEQ:
2424 case UNGE:
2425 case UNGT:
2426 case UNLE:
2427 case UNLT:
2428 case LTGT:
2429 return NULL_RTX;
2431 default:
2432 gcc_unreachable ();
2435 if (mode == DImode)
2437 rtx t2 = gen_reg_rtx (DImode);
2438 emit_insn (gen_extendsidi2 (t2, target));
2439 target = t2;
2442 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2445 /* Called from the md file, set up the operands of a compare instruction. */
2447 void
2448 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2450 enum rtx_code code = GET_CODE (operands[0]);
2451 enum rtx_code branch_code;
2452 rtx op0 = operands[1];
2453 rtx op1 = operands[2];
2454 rtx insn, tem;
2455 bool need_ccmpeq = false;
2457 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2459 op0 = force_reg (mode, op0);
2460 op1 = force_reg (mode, op1);
2462 else
2464 if (code != EQ || mode == DImode)
2466 /* Force args into regs, since we can't use constants here. */
2467 op0 = force_reg (mode, op0);
2468 if (op1 != const0_rtx || code == GTU || code == GEU)
2469 op1 = force_reg (mode, op1);
2473 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2475 if (code == LT
2476 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2477 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2479 tem = op0, op0 = op1, op1 = tem;
2480 code = swap_condition (code);
2483 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2484 if (code == GE)
2486 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2487 need_ccmpeq = true;
2488 code = GT;
2491 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2492 to EQ/GT respectively. */
2493 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2496 switch (code)
2498 case EQ:
2499 case GT:
2500 case GE:
2501 case GTU:
2502 case GEU:
2503 branch_code = code;
2504 break;
2505 case NE:
2506 case LT:
2507 case LE:
2508 case LTU:
2509 case LEU:
2510 branch_code = reverse_condition (code);
2511 break;
2512 default:
2513 gcc_unreachable ();
2516 insn = gen_rtx_SET (VOIDmode,
2517 get_t_reg_rtx (),
2518 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2520 sh_emit_set_t_insn (insn, mode);
2521 if (need_ccmpeq)
2522 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2524 if (branch_code == code)
2525 emit_jump_insn (gen_branch_true (operands[3]));
2526 else
2527 emit_jump_insn (gen_branch_false (operands[3]));
2530 void
2531 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2533 enum rtx_code code = GET_CODE (operands[1]);
2534 rtx op0 = operands[2];
2535 rtx op1 = operands[3];
2536 rtx lab = NULL_RTX;
2537 bool invert = false;
2538 rtx tem;
2540 op0 = force_reg (mode, op0);
2541 if ((code != EQ && code != NE
2542 && (op1 != const0_rtx
2543 || code == GTU || code == GEU || code == LTU || code == LEU))
2544 || (mode == DImode && op1 != const0_rtx)
2545 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2546 op1 = force_reg (mode, op1);
2548 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2550 if (code == LT || code == LE)
2552 code = swap_condition (code);
2553 tem = op0, op0 = op1, op1 = tem;
2555 if (code == GE)
2557 if (TARGET_IEEE)
2559 lab = gen_label_rtx ();
2560 sh_emit_scc_to_t (EQ, op0, op1);
2561 emit_jump_insn (gen_branch_true (lab));
2562 code = GT;
2564 else
2566 code = LT;
2567 invert = true;
2572 if (code == NE)
2574 code = EQ;
2575 invert = true;
2578 sh_emit_scc_to_t (code, op0, op1);
2579 if (lab)
2580 emit_label (lab);
2581 if (invert)
2582 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2583 else
2584 emit_move_insn (operands[0], get_t_reg_rtx ());
2587 /* Functions to output assembly code. */
2589 /* Return a sequence of instructions to perform DI or DF move.
2591 Since the SH cannot move a DI or DF in one instruction, we have
2592 to take care when we see overlapping source and dest registers. */
2594 const char *
2595 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2596 enum machine_mode mode)
2598 rtx dst = operands[0];
2599 rtx src = operands[1];
2601 if (MEM_P (dst)
2602 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2603 return "mov.l %T1,%0\n\tmov.l %1,%0";
2605 if (register_operand (dst, mode)
2606 && register_operand (src, mode))
2608 if (REGNO (src) == MACH_REG)
2609 return "sts mach,%S0\n\tsts macl,%R0";
2611 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2612 when mov.d r1,r0 do r1->r0 then r2->r1. */
2614 if (REGNO (src) + 1 == REGNO (dst))
2615 return "mov %T1,%T0\n\tmov %1,%0";
2616 else
2617 return "mov %1,%0\n\tmov %T1,%T0";
2619 else if (CONST_INT_P (src))
2621 if (INTVAL (src) < 0)
2622 output_asm_insn ("mov #-1,%S0", operands);
2623 else
2624 output_asm_insn ("mov #0,%S0", operands);
2626 return "mov %1,%R0";
2628 else if (MEM_P (src))
2630 int ptrreg = -1;
2631 int dreg = REGNO (dst);
2632 rtx inside = XEXP (src, 0);
2634 switch (GET_CODE (inside))
2636 case REG:
2637 ptrreg = REGNO (inside);
2638 break;
2640 case SUBREG:
2641 ptrreg = subreg_regno (inside);
2642 break;
2644 case PLUS:
2645 ptrreg = REGNO (XEXP (inside, 0));
2646 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2647 an offsettable address. Unfortunately, offsettable addresses use
2648 QImode to check the offset, and a QImode offsettable address
2649 requires r0 for the other operand, which is not currently
2650 supported, so we can't use the 'o' constraint.
2651 Thus we must check for and handle r0+REG addresses here.
2652 We punt for now, since this is likely very rare. */
2653 gcc_assert (!REG_P (XEXP (inside, 1)));
2654 break;
2656 case LABEL_REF:
2657 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
2658 case POST_INC:
2659 return "mov.l %1,%0\n\tmov.l %1,%T0";
2660 default:
2661 gcc_unreachable ();
2664 /* Work out the safe way to copy. Copy into the second half first. */
2665 if (dreg == ptrreg)
2666 return "mov.l %T1,%T0\n\tmov.l %1,%0";
2669 return "mov.l %1,%0\n\tmov.l %T1,%T0";
2672 /* Print an instruction which would have gone into a delay slot after
2673 another instruction, but couldn't because the other instruction expanded
2674 into a sequence where putting the slot insn at the end wouldn't work. */
2676 static void
2677 print_slot (rtx insn)
2679 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2681 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2684 const char *
2685 output_far_jump (rtx insn, rtx op)
2687 struct { rtx lab, reg, op; } this_jmp;
2688 rtx braf_base_lab = NULL_RTX;
2689 const char *jump;
2690 int far;
2691 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2692 rtx prev;
2694 this_jmp.lab = gen_label_rtx ();
2696 if (TARGET_SH2
2697 && offset >= -32764
2698 && offset - get_attr_length (insn) <= 32766)
2700 far = 0;
2701 jump = "mov.w %O0,%1; braf %1";
2703 else
2705 far = 1;
2706 if (flag_pic)
2708 if (TARGET_SH2)
2709 jump = "mov.l %O0,%1; braf %1";
2710 else
2711 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
2713 else
2714 jump = "mov.l %O0,%1; jmp @%1";
2716 /* If we have a scratch register available, use it. */
2717 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2718 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2720 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2721 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2722 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
2723 output_asm_insn (jump, &this_jmp.lab);
2724 if (dbr_sequence_length ())
2725 print_slot (final_sequence);
2726 else
2727 output_asm_insn ("nop", 0);
2729 else
2731 /* Output the delay slot insn first if any. */
2732 if (dbr_sequence_length ())
2733 print_slot (final_sequence);
2735 this_jmp.reg = gen_rtx_REG (SImode, 13);
2736 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2737 Fortunately, MACL is fixed and call-clobbered, and we never
2738 need its value across jumps, so save r13 in it instead of in
2739 the stack. */
2740 if (TARGET_SH5)
2741 output_asm_insn ("lds r13, macl", 0);
2742 else
2743 output_asm_insn ("mov.l r13,@-r15", 0);
2744 output_asm_insn (jump, &this_jmp.lab);
2745 if (TARGET_SH5)
2746 output_asm_insn ("sts macl, r13", 0);
2747 else
2748 output_asm_insn ("mov.l @r15+,r13", 0);
2750 if (far && flag_pic && TARGET_SH2)
2752 braf_base_lab = gen_label_rtx ();
2753 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2754 CODE_LABEL_NUMBER (braf_base_lab));
2756 if (far)
2757 output_asm_insn (".align 2", 0);
2758 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2759 this_jmp.op = op;
2760 if (far && flag_pic)
2762 if (TARGET_SH2)
2763 this_jmp.lab = braf_base_lab;
2764 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2766 else
2767 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2768 return "";
2771 /* Local label counter, used for constants in the pool and inside
2772 pattern branches. */
2774 static int lf = 100;
2776 /* Output code for ordinary branches. */
2778 const char *
2779 output_branch (int logic, rtx insn, rtx *operands)
2781 switch (get_attr_length (insn))
2783 case 6:
2784 /* This can happen if filling the delay slot has caused a forward
2785 branch to exceed its range (we could reverse it, but only
2786 when we know we won't overextend other branches; this should
2787 best be handled by relaxation).
2788 It can also happen when other condbranches hoist delay slot insn
2789 from their destination, thus leading to code size increase.
2790 But the branch will still be in the range -4092..+4098 bytes. */
2792 if (! TARGET_RELAX)
2794 int label = lf++;
2795 /* The call to print_slot will clobber the operands. */
2796 rtx op0 = operands[0];
2798 /* If the instruction in the delay slot is annulled (true), then
2799 there is no delay slot where we can put it now. The only safe
2800 place for it is after the label. final will do that by default. */
2802 if (final_sequence
2803 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2804 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2806 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2807 ASSEMBLER_DIALECT ? "/" : ".", label);
2808 print_slot (final_sequence);
2810 else
2811 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2813 output_asm_insn ("bra\t%l0", &op0);
2814 fprintf (asm_out_file, "\tnop\n");
2815 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2817 return "";
2819 /* When relaxing, handle this like a short branch. The linker
2820 will fix it up if it still doesn't fit after relaxation. */
2821 case 2:
2822 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2824 /* These are for SH2e, in which we have to account for the
2825 extra nop because of the hardware bug in annulled branches. */
2826 case 8:
2827 if (! TARGET_RELAX)
2829 int label = lf++;
2831 gcc_assert (!final_sequence
2832 || !(INSN_ANNULLED_BRANCH_P
2833 (XVECEXP (final_sequence, 0, 0))));
2834 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2835 logic ? "f" : "t",
2836 ASSEMBLER_DIALECT ? "/" : ".", label);
2837 fprintf (asm_out_file, "\tnop\n");
2838 output_asm_insn ("bra\t%l0", operands);
2839 fprintf (asm_out_file, "\tnop\n");
2840 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2842 return "";
2844 /* When relaxing, fall through. */
2845 case 4:
2847 char buffer[10];
2849 sprintf (buffer, "b%s%ss\t%%l0",
2850 logic ? "t" : "f",
2851 ASSEMBLER_DIALECT ? "/" : ".");
2852 output_asm_insn (buffer, &operands[0]);
2853 return "nop";
2856 default:
2857 /* There should be no longer branches now - that would
2858 indicate that something has destroyed the branches set
2859 up in machine_dependent_reorg. */
2860 gcc_unreachable ();
2864 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2865 fill in operands 9 as a label to the successor insn.
2866 We try to use jump threading where possible.
2867 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2868 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2869 follow jmp and bt, if the address is in range. */
2870 const char *
2871 output_branchy_insn (enum rtx_code code, const char *templ,
2872 rtx insn, rtx *operands)
2874 rtx next_insn = NEXT_INSN (insn);
2876 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2878 rtx src = SET_SRC (PATTERN (next_insn));
2879 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2881 /* Following branch not taken */
2882 operands[9] = gen_label_rtx ();
2883 emit_label_after (operands[9], next_insn);
2884 INSN_ADDRESSES_NEW (operands[9],
2885 INSN_ADDRESSES (INSN_UID (next_insn))
2886 + get_attr_length (next_insn));
2887 return templ;
2889 else
2891 int offset = (branch_dest (next_insn)
2892 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2893 if (offset >= -252 && offset <= 258)
2895 if (GET_CODE (src) == IF_THEN_ELSE)
2896 /* branch_true */
2897 src = XEXP (src, 1);
2898 operands[9] = src;
2899 return templ;
2903 operands[9] = gen_label_rtx ();
2904 emit_label_after (operands[9], insn);
2905 INSN_ADDRESSES_NEW (operands[9],
2906 INSN_ADDRESSES (INSN_UID (insn))
2907 + get_attr_length (insn));
2908 return templ;
2911 const char *
2912 output_ieee_ccmpeq (rtx insn, rtx *operands)
2914 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
2915 insn, operands);
2918 /* Output the start of the assembler file. */
2920 static void
2921 sh_file_start (void)
2923 default_file_start ();
2925 if (TARGET_ELF)
2926 /* We need to show the text section with the proper
2927 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2928 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2929 will complain. We can teach GAS specifically about the
2930 default attributes for our choice of text section, but
2931 then we would have to change GAS again if/when we change
2932 the text section name. */
2933 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2934 else
2935 /* Switch to the data section so that the coffsem symbol
2936 isn't in the text section. */
2937 switch_to_section (data_section);
2939 if (TARGET_LITTLE_ENDIAN)
2940 fputs ("\t.little\n", asm_out_file);
2942 if (!TARGET_ELF)
2944 if (TARGET_SHCOMPACT)
2945 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2946 else if (TARGET_SHMEDIA)
2947 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2948 TARGET_SHMEDIA64 ? 64 : 32);
2952 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2954 static bool
2955 unspec_caller_rtx_p (rtx pat)
2957 rtx base, offset;
2958 int i;
2960 split_const (pat, &base, &offset);
2961 if (GET_CODE (base) == UNSPEC)
2963 if (XINT (base, 1) == UNSPEC_CALLER)
2964 return true;
2965 for (i = 0; i < XVECLEN (base, 0); i++)
2966 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2967 return true;
2969 return false;
2972 /* Indicate that INSN cannot be duplicated. This is true for insn
2973 that generates a unique label. */
2975 static bool
2976 sh_cannot_copy_insn_p (rtx insn)
2978 rtx pat;
2980 if (!reload_completed || !flag_pic)
2981 return false;
2983 if (!NONJUMP_INSN_P (insn))
2984 return false;
2985 if (asm_noperands (insn) >= 0)
2986 return false;
2988 pat = PATTERN (insn);
2989 if (GET_CODE (pat) != SET)
2990 return false;
2991 pat = SET_SRC (pat);
2993 if (unspec_caller_rtx_p (pat))
2994 return true;
2996 return false;
2999 /* Number of instructions used to make an arithmetic right shift by N. */
3000 static const char ashiftrt_insns[] =
3001 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
3003 /* Description of a logical left or right shift, when expanded to a sequence
3004 of 1/2/8/16 shifts.
3005 Notice that one bit right shifts clobber the T bit. One bit left shifts
3006 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
3007 enum
3009 ASHL_CLOBBERS_T = 1 << 0,
3010 LSHR_CLOBBERS_T = 1 << 1
3013 struct ashl_lshr_sequence
3015 char insn_count;
3016 char amount[6];
3017 char clobbers_t;
3020 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
3022 { 0, { 0 }, 0 }, // 0
3023 { 1, { 1 }, LSHR_CLOBBERS_T },
3024 { 1, { 2 }, 0 },
3025 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3026 { 2, { 2, 2 }, 0 }, // 4
3027 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3028 { 3, { 2, 2, 2 }, 0 },
3029 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3030 { 1, { 8 }, 0 }, // 8
3031 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3032 { 2, { 8, 2 }, 0 },
3033 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3034 { 3, { 8, 2, 2 }, 0 }, // 12
3035 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3036 { 3, { 8, -2, 8 }, 0 },
3037 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3038 { 1, { 16 }, 0 }, // 16
3039 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3040 { 2, { 16, 2 }, 0 },
3041 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3042 { 3, { 16, 2, 2 }, 0 }, // 20
3043 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3044 { 3, { 16, -2, 8 }, 0 },
3045 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3046 { 2, { 16, 8 }, 0 }, // 24
3047 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3048 { 3, { 16, 8, 2 }, 0 },
3049 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3050 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3051 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3052 { 3, { 16, -2, 16 }, 0 },
3054 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3055 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3056 However, the shift-and combiner code needs this entry here to be in
3057 terms of real shift insns. */
3058 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3061 /* Individual shift amounts for shift amounts < 16, up to three highmost
3062 bits might be clobbered. This is typically used when combined with some
3063 kind of sign or zero extension. */
3064 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3066 { 0, { 0 }, 0 }, // 0
3067 { 1, { 1 }, LSHR_CLOBBERS_T },
3068 { 1, { 2 }, 0 },
3069 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3070 { 2, { 2, 2 }, 0 }, // 4
3071 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3072 { 2, { 8, -2 }, 0 },
3073 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3074 { 1, { 8 }, 0 }, // 8
3075 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3076 { 2, { 8, 2 }, 0 },
3077 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3078 { 3, { 8, 2, 2 }, 0 }, // 12
3079 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3080 { 2, { 16, -2 }, 0 },
3081 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3082 { 1, { 16 }, 0 }, // 16
3083 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3084 { 2, { 16, 2 }, 0 },
3085 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3086 { 3, { 16, 2, 2 }, 0 }, // 20
3087 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3088 { 3, { 16, -2, 8 }, 0 },
3089 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3090 { 2, { 16, 8 }, 0 }, // 24
3091 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3092 { 3, { 16, 8, 2 }, 0 },
3093 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3094 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3095 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3096 { 3, { 16, -2, 16 }, 0 },
3097 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3100 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3101 will clobber the T bit. */
3102 bool
3103 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3105 gcc_assert (CONST_INT_P (shift_amount));
3107 const int shift_amount_i = INTVAL (shift_amount) & 31;
3109 /* Special case for shift count of 31: use and-rotl sequence. */
3110 if (shift_amount_i == 31)
3111 return true;
3113 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3114 & ASHL_CLOBBERS_T) != 0;
3117 bool
3118 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3120 gcc_assert (CONST_INT_P (shift_amount));
3122 const int shift_amount_i = INTVAL (shift_amount) & 31;
3124 /* Special case for shift count of 31: use shll-movt sequence. */
3125 if (shift_amount_i == 31)
3126 return true;
3128 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3129 & LSHR_CLOBBERS_T) != 0;
3132 /* Return true if it is potentially beneficial to use a dynamic shift
3133 instruction (shad / shar) instead of a combination of 1/2/8/16
3134 shift instructions for the specified shift count.
3135 If dynamic shifts are not available, always return false. */
3136 bool
3137 sh_dynamicalize_shift_p (rtx count)
3139 gcc_assert (CONST_INT_P (count));
3141 const int shift_amount_i = INTVAL (count) & 31;
3142 int insn_count;
3144 /* For left and right shifts, there are shorter 2 insn sequences for
3145 shift amounts of 31. */
3146 if (shift_amount_i == 31)
3147 insn_count = 2;
3148 else
3149 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3151 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3154 /* Assuming we have a value that has been sign-extended by at least one bit,
3155 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
3156 to shift it by N without data loss, and quicker than by other means? */
3157 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3159 /* Return the cost of a shift. */
3161 static inline int
3162 shiftcosts (rtx x)
3164 int value;
3166 if (TARGET_SHMEDIA)
3167 return 1;
3169 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3171 if (GET_MODE (x) == DImode
3172 && CONST_INT_P (XEXP (x, 1))
3173 && INTVAL (XEXP (x, 1)) == 1)
3174 return 2;
3176 /* Everything else is invalid, because there is no pattern for it. */
3177 return -1;
3179 /* If shift by a non constant, then this will be expensive. */
3180 if (!CONST_INT_P (XEXP (x, 1)))
3181 return SH_DYNAMIC_SHIFT_COST;
3183 /* Otherwise, return the true cost in instructions. Cope with out of range
3184 shift counts more or less arbitrarily. */
3185 value = INTVAL (XEXP (x, 1)) & 31;
3187 if (GET_CODE (x) == ASHIFTRT)
3189 int cost = ashiftrt_insns[value];
3190 /* If dynamic shifts are available and profitable in this case, then we
3191 put the constant in a reg and use shad. */
3192 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3193 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3194 return cost;
3196 else
3197 return ashl_lshr_seq[value].insn_count;
3200 /* Return the cost of an AND/XOR/IOR operation. */
3202 static inline int
3203 and_xor_ior_costs (rtx x, int code)
3205 int i;
3207 /* A logical operation with two registers is a single cycle
3208 instruction. */
3209 if (!CONST_INT_P (XEXP (x, 1)))
3210 return 1;
3212 i = INTVAL (XEXP (x, 1));
3214 if (TARGET_SHMEDIA)
3216 if (satisfies_constraint_I10 (XEXP (x, 1))
3217 || satisfies_constraint_J16 (XEXP (x, 1)))
3218 return 1;
3219 else
3220 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3223 /* These constants are single cycle extu.[bw] instructions. */
3224 if ((i == 0xff || i == 0xffff) && code == AND)
3225 return 1;
3226 /* Constants that can be used in an instruction as an immediate are
3227 a single cycle, but this requires r0, so make it a little more
3228 expensive. */
3229 if (CONST_OK_FOR_K08 (i))
3230 return 2;
3231 /* Constants that can be loaded with a mov immediate need one more cycle.
3232 This case is probably unnecessary. */
3233 if (CONST_OK_FOR_I08 (i))
3234 return 2;
3235 /* Any other constant requires an additional 2 cycle pc-relative load.
3236 This case is probably unnecessary. */
3237 return 3;
3240 /* Return the cost of an addition or a subtraction. */
3242 static inline int
3243 addsubcosts (rtx x)
3245 /* Adding a register is a single cycle insn. */
3246 if (REG_P (XEXP (x, 1))
3247 || GET_CODE (XEXP (x, 1)) == SUBREG)
3248 return 1;
3250 /* Likewise for small constants. */
3251 if (CONST_INT_P (XEXP (x, 1))
3252 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3253 return 1;
3255 if (TARGET_SHMEDIA)
3256 switch (GET_CODE (XEXP (x, 1)))
3258 case CONST:
3259 case LABEL_REF:
3260 case SYMBOL_REF:
3261 return TARGET_SHMEDIA64 ? 5 : 3;
3263 case CONST_INT:
3264 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3265 return 2;
3266 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3267 return 3;
3268 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3269 return 4;
3271 /* Fall through. */
3272 default:
3273 return 5;
3276 /* Any other constant requires a 2 cycle pc-relative load plus an
3277 addition. */
3278 return 3;
3281 /* Return the cost of a multiply. */
3282 static inline int
3283 multcosts (rtx x ATTRIBUTE_UNUSED)
3285 if (sh_multcost >= 0)
3286 return sh_multcost;
3287 if (TARGET_SHMEDIA)
3288 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3289 accept constants. Ideally, we would use a cost of one or two and
3290 add the cost of the operand, but disregard the latter when inside loops
3291 and loop invariant code motion is still to follow.
3292 Using a multiply first and splitting it later if it's a loss
3293 doesn't work because of different sign / zero extension semantics
3294 of multiplies vs. shifts. */
3295 return optimize_size ? 2 : 3;
3297 if (TARGET_SH2)
3299 /* We have a mul insn, so we can never take more than the mul and the
3300 read of the mac reg, but count more because of the latency and extra
3301 reg usage. */
3302 if (optimize_size)
3303 return 2;
3304 return 3;
3307 /* If we're aiming at small code, then just count the number of
3308 insns in a multiply call sequence. */
3309 if (optimize_size)
3310 return 5;
3312 /* Otherwise count all the insns in the routine we'd be calling too. */
3313 return 20;
3316 /* Compute a (partial) cost for rtx X. Return true if the complete
3317 cost has been computed, and false if subexpressions should be
3318 scanned. In either case, *TOTAL contains the cost result. */
3320 static bool
3321 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3322 int *total, bool speed ATTRIBUTE_UNUSED)
3324 switch (code)
3326 /* The lower-subreg pass decides whether to split multi-word regs
3327 into individual regs by looking at the cost for a SET of certain
3328 modes with the following patterns:
3329 (set (reg) (reg))
3330 (set (reg) (const_int 0))
3331 On machines that support vector-move operations a multi-word move
3332 is the same cost as individual reg move. On SH there is no
3333 vector-move, so we have to provide the correct cost in the number
3334 of move insns to load/store the reg of the mode in question. */
3335 case SET:
3336 if (register_operand (SET_DEST (x), VOIDmode)
3337 && (register_operand (SET_SRC (x), VOIDmode)
3338 || satisfies_constraint_Z (SET_SRC (x))))
3340 const enum machine_mode mode = GET_MODE (SET_DEST (x));
3341 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3342 / mov_insn_size (mode, TARGET_SH2A));
3343 return true;
3345 return false;
3347 /* The cost of a mem access is mainly the cost of the address mode. */
3348 case MEM:
3349 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3350 true);
3351 return true;
3353 /* The cost of a sign or zero extend depends on whether the source is a
3354 reg or a mem. In case of a mem take the address into acount. */
3355 case SIGN_EXTEND:
3356 if (REG_P (XEXP (x, 0)))
3358 *total = COSTS_N_INSNS (1);
3359 return true;
3361 if (MEM_P (XEXP (x, 0)))
3363 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3364 GET_MODE (XEXP (x, 0)),
3365 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3366 return true;
3368 return false;
3370 case ZERO_EXTEND:
3371 if (REG_P (XEXP (x, 0)))
3373 *total = COSTS_N_INSNS (1);
3374 return true;
3376 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3377 && (GET_MODE (XEXP (x, 0)) == QImode
3378 || GET_MODE (XEXP (x, 0)) == HImode))
3380 /* Handle SH2A's movu.b and movu.w insn. */
3381 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3382 GET_MODE (XEXP (x, 0)),
3383 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3384 return true;
3386 return false;
3388 /* mems for SFmode and DFmode can be inside a parallel due to
3389 the way the fpscr is handled. */
3390 case PARALLEL:
3391 for (int i = 0; i < XVECLEN (x, 0); i++)
3393 rtx xx = XVECEXP (x, 0, i);
3394 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3396 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3397 GET_MODE (XEXP (xx, 0)),
3398 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3399 return true;
3401 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3403 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3404 GET_MODE (XEXP (xx, 1)),
3405 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3406 return true;
3410 if (sh_1el_vec (x, VOIDmode))
3411 *total = outer_code != SET;
3412 else if (sh_rep_vec (x, VOIDmode))
3413 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3414 + (outer_code != SET));
3415 else
3416 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3417 return true;
3419 case CONST_INT:
3420 if (TARGET_SHMEDIA)
3422 if (INTVAL (x) == 0)
3423 *total = 0;
3424 else if (outer_code == AND && and_operand ((x), DImode))
3425 *total = 0;
3426 else if ((outer_code == IOR || outer_code == XOR
3427 || outer_code == PLUS)
3428 && CONST_OK_FOR_I10 (INTVAL (x)))
3429 *total = 0;
3430 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3431 *total = COSTS_N_INSNS (outer_code != SET);
3432 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3433 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3434 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3435 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3436 else
3437 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3438 return true;
3440 if (CONST_OK_FOR_I08 (INTVAL (x)))
3441 *total = 0;
3442 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3443 && CONST_OK_FOR_K08 (INTVAL (x)))
3444 *total = 1;
3445 /* prepare_cmp_insn will force costly constants int registers before
3446 the cbranch[sd]i4 patterns can see them, so preserve potentially
3447 interesting ones not covered by I08 above. */
3448 else if (outer_code == COMPARE
3449 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3450 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3451 || INTVAL (x) == 0x7fffffff
3452 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3453 *total = 1;
3454 else
3455 *total = 8;
3456 return true;
3458 case EQ:
3459 /* An and with a constant compared against zero is
3460 most likely going to be a TST #imm, R0 instruction.
3461 Notice that this does not catch the zero_extract variants from
3462 the md file. */
3463 if (GET_CODE (XEXP (x, 0)) == AND
3464 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3466 *total = 1;
3467 return true;
3469 else
3470 return false;
3472 case CONST:
3473 case LABEL_REF:
3474 case SYMBOL_REF:
3475 if (TARGET_SHMEDIA64)
3476 *total = COSTS_N_INSNS (4);
3477 else if (TARGET_SHMEDIA32)
3478 *total = COSTS_N_INSNS (2);
3479 else
3480 *total = 5;
3481 return true;
3483 case CONST_DOUBLE:
3484 if (TARGET_SHMEDIA)
3485 *total = COSTS_N_INSNS (4);
3486 /* prepare_cmp_insn will force costly constants int registers before
3487 the cbranchdi4 pattern can see them, so preserve potentially
3488 interesting ones. */
3489 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3490 *total = 1;
3491 else
3492 *total = 10;
3493 return true;
3495 case CONST_VECTOR:
3496 /* FIXME: This looks broken. Only the last statement has any effect.
3497 Probably this could be folded with the PARALLEL case? */
3498 if (x == CONST0_RTX (GET_MODE (x)))
3499 *total = 0;
3500 else if (sh_1el_vec (x, VOIDmode))
3501 *total = outer_code != SET;
3502 if (sh_rep_vec (x, VOIDmode))
3503 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3504 + (outer_code != SET));
3505 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3506 return true;
3508 case PLUS:
3509 case MINUS:
3510 *total = COSTS_N_INSNS (addsubcosts (x));
3511 return true;
3513 case AND:
3514 case XOR:
3515 case IOR:
3516 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3517 return true;
3519 case MULT:
3520 *total = COSTS_N_INSNS (multcosts (x));
3521 return true;
3523 case LT:
3524 case GE:
3525 /* div0s sign comparison. */
3526 if (GET_CODE (XEXP (x, 0)) == XOR
3527 && REG_P ((XEXP (XEXP (x, 0), 0)))
3528 && REG_P ((XEXP (XEXP (x, 0), 1)))
3529 && satisfies_constraint_Z (XEXP (x, 1)))
3531 *total = COSTS_N_INSNS (1);
3532 return true;
3534 else
3535 return false;
3537 case LSHIFTRT:
3538 /* div0s sign comparison. */
3539 if (GET_CODE (XEXP (x, 0)) == XOR
3540 && REG_P ((XEXP (XEXP (x, 0), 0)))
3541 && REG_P ((XEXP (XEXP (x, 0), 1)))
3542 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3544 *total = COSTS_N_INSNS (1);
3545 return true;
3547 /* Fall through to shiftcosts. */
3548 case ASHIFT:
3549 case ASHIFTRT:
3551 int cost = shiftcosts (x);
3552 if (cost < 0)
3553 return false;
3554 *total = COSTS_N_INSNS (cost);
3555 return true;
3558 case DIV:
3559 case UDIV:
3560 case MOD:
3561 case UMOD:
3562 *total = COSTS_N_INSNS (20);
3563 return true;
3565 case FLOAT:
3566 case FIX:
3567 *total = 100;
3568 return true;
3570 default:
3571 return false;
3575 /* Determine the size of the fundamental move insn that will be used
3576 for the specified mode. */
3578 static inline int
3579 mov_insn_size (enum machine_mode mode, bool consider_sh2a)
3581 const int mode_sz = GET_MODE_SIZE (mode);
3583 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3584 || (TARGET_FMOVD && mode == DFmode))
3585 return mode_sz;
3586 else
3588 /* The max. available mode for actual move insns is SImode.
3589 Larger accesses will be split into multiple loads/stores. */
3590 const int max_mov_sz = GET_MODE_SIZE (SImode);
3591 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3595 /* Determine the maximum possible displacement for a move insn for the
3596 specified mode. */
3598 static int
3599 max_mov_insn_displacement (enum machine_mode mode, bool consider_sh2a)
3601 /* The 4 byte displacement move insns are the same as the 2 byte
3602 versions but take a 12 bit displacement. All we need to do is to
3603 scale the max. displacement value accordingly. */
3604 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3606 /* SH2A supports FPU move insns with 12 bit displacements.
3607 Other variants to do not support any kind of displacements for
3608 FPU move insns. */
3609 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3610 return 0;
3611 else
3613 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3614 const int mode_sz = GET_MODE_SIZE (mode);
3615 int r = 15 * mov_insn_sz * disp_scale;
3617 /* If the mov insn will be split into multiple loads/stores, the
3618 maximum possible displacement is a bit smaller. */
3619 if (mode_sz > mov_insn_sz)
3620 r -= mode_sz - mov_insn_sz;
3621 return r;
3625 /* Determine the alignment mask for a move insn of the
3626 specified mode. */
3628 static inline int
3629 mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
3631 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3632 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3635 /* Return the displacement value of a displacement address. */
3637 static inline HOST_WIDE_INT
3638 disp_addr_displacement (rtx x)
3640 gcc_assert (satisfies_constraint_Sdd (x));
3641 return INTVAL (XEXP (XEXP (x, 0), 1));
3644 /* Compute the cost of an address. */
3646 static int
3647 sh_address_cost (rtx x, enum machine_mode mode,
3648 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3650 /* 'GBR + 0'. Account one more because of R0 restriction. */
3651 if (REG_P (x) && REGNO (x) == GBR_REG)
3652 return 2;
3654 /* Simple reg, post-inc, pre-dec addressing. */
3655 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3656 return 1;
3658 /* 'reg + disp' addressing. */
3659 if (GET_CODE (x) == PLUS
3660 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3662 /* 'GBR + disp'. Account one more because of R0 restriction. */
3663 if (REGNO (XEXP (x, 0)) == GBR_REG
3664 && gbr_displacement (XEXP (x, 1), mode))
3665 return 2;
3667 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3669 if (offset == 0)
3670 return 1;
3672 /* The displacement would fit into a 2 byte move insn.
3673 HImode and QImode loads/stores with displacement put pressure on
3674 R0 which will most likely require another reg copy. Thus account
3675 a higher cost for that. */
3676 if (offset > 0 && offset <= max_mov_insn_displacement (mode, false))
3677 return (mode == HImode || mode == QImode) ? 2 : 1;
3679 /* The displacement would fit into a 4 byte move insn (SH2A). */
3680 if (TARGET_SH2A
3681 && offset > 0 && offset <= max_mov_insn_displacement (mode, true))
3682 return 2;
3684 /* The displacement is probably out of range and will require extra
3685 calculations. */
3686 return 3;
3689 /* 'reg + reg' addressing. Account a slightly higher cost because of
3690 increased pressure on R0. */
3691 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3692 && ! TARGET_SHMEDIA)
3693 return 3;
3695 /* Not sure what it is - probably expensive. */
3696 return 10;
3699 /* Code to expand a shift. */
3701 static void
3702 gen_ashift (int type, int n, rtx reg)
3704 rtx n_rtx;
3706 /* Negative values here come from the shift_amounts array. */
3707 if (n < 0)
3709 if (type == ASHIFT)
3710 type = LSHIFTRT;
3711 else
3712 type = ASHIFT;
3713 n = -n;
3716 n_rtx = GEN_INT (n);
3717 gcc_assert (satisfies_constraint_P27 (n_rtx));
3719 switch (type)
3721 case ASHIFTRT:
3722 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3723 break;
3724 case LSHIFTRT:
3725 if (n == 1)
3726 emit_insn (gen_shlr (reg, reg));
3727 else
3728 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3729 break;
3730 case ASHIFT:
3731 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3732 break;
3733 default:
3734 gcc_unreachable ();
3738 /* Same for HImode */
3739 static void
3740 gen_ashift_hi (int type, int n, rtx reg)
3742 /* Negative values here come from the shift_amounts array. */
3743 if (n < 0)
3745 if (type == ASHIFT)
3746 type = LSHIFTRT;
3747 else
3748 type = ASHIFT;
3749 n = -n;
3752 switch (type)
3754 case ASHIFTRT:
3755 case LSHIFTRT:
3756 /* We don't have HImode right shift operations because using the
3757 ordinary 32 bit shift instructions for that doesn't generate proper
3758 zero/sign extension.
3759 gen_ashift_hi is only called in contexts where we know that the
3760 sign extension works out correctly. */
3762 int offset = 0;
3763 if (GET_CODE (reg) == SUBREG)
3765 offset = SUBREG_BYTE (reg);
3766 reg = SUBREG_REG (reg);
3768 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3769 break;
3771 case ASHIFT:
3772 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3773 break;
3777 /* Output RTL to split a constant shift into its component SH constant
3778 shift instructions. */
3780 void
3781 gen_shifty_op (int code, rtx *operands)
3783 int value = INTVAL (operands[2]);
3784 int max, i;
3786 /* Truncate the shift count in case it is out of bounds. */
3787 value = value & 31;
3789 if (value == 31)
3791 if (code == LSHIFTRT)
3793 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3794 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3795 return;
3797 else if (code == ASHIFT)
3799 /* There is a two instruction sequence for 31 bit left shifts,
3800 but it requires r0. */
3801 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3803 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3804 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3805 return;
3809 else if (value == 0)
3811 /* This can happen even when optimizing, if there were subregs before
3812 reload. Don't output a nop here, as this is never optimized away;
3813 use a no-op move instead. */
3814 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3815 return;
3818 max = ashl_lshr_seq[value].insn_count;
3819 for (i = 0; i < max; i++)
3820 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3823 /* Same as above, but optimized for values where the topmost bits don't
3824 matter. */
3826 void
3827 gen_shifty_hi_op (int code, rtx *operands)
3829 int value = INTVAL (operands[2]);
3830 int max, i;
3831 void (*gen_fun) (int, int, rtx);
3833 /* This operation is used by and_shl for SImode values with a few
3834 high bits known to be cleared. */
3835 value &= 31;
3836 if (value == 0)
3838 emit_insn (gen_nop ());
3839 return;
3842 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3843 if (code == ASHIFT)
3845 max = ext_ashl_lshr_seq[value].insn_count;
3846 for (i = 0; i < max; i++)
3847 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3849 else
3850 /* When shifting right, emit the shifts in reverse order, so that
3851 solitary negative values come first. */
3852 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3853 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3856 /* Output RTL for an arithmetic right shift. */
3858 /* ??? Rewrite to use super-optimizer sequences. */
3860 bool
3861 expand_ashiftrt (rtx *operands)
3863 rtx wrk;
3864 char func[18];
3865 int value;
3867 if (TARGET_DYNSHIFT)
3869 if (!CONST_INT_P (operands[2]))
3871 rtx count = copy_to_mode_reg (SImode, operands[2]);
3872 emit_insn (gen_negsi2 (count, count));
3873 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3874 return true;
3876 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3877 > 1 + SH_DYNAMIC_SHIFT_COST)
3879 rtx count
3880 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3881 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3882 return true;
3885 if (!CONST_INT_P (operands[2]))
3886 return false;
3888 value = INTVAL (operands[2]) & 31;
3890 if (value == 31)
3892 /* If we are called from abs expansion, arrange things so that we
3893 we can use a single MT instruction that doesn't clobber the source,
3894 if LICM can hoist out the load of the constant zero. */
3895 if (currently_expanding_to_rtl)
3897 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3898 operands[1]));
3899 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3900 return true;
3902 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3903 return true;
3905 else if (value >= 16 && value <= 19)
3907 wrk = gen_reg_rtx (SImode);
3908 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3909 value -= 16;
3910 while (value--)
3911 gen_ashift (ASHIFTRT, 1, wrk);
3912 emit_move_insn (operands[0], wrk);
3913 return true;
3915 /* Expand a short sequence inline, longer call a magic routine. */
3916 else if (value <= 5)
3918 wrk = gen_reg_rtx (SImode);
3919 emit_move_insn (wrk, operands[1]);
3920 while (value--)
3921 gen_ashift (ASHIFTRT, 1, wrk);
3922 emit_move_insn (operands[0], wrk);
3923 return true;
3926 wrk = gen_reg_rtx (Pmode);
3928 /* Load the value into an arg reg and call a helper. */
3929 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3930 sprintf (func, "__ashiftrt_r4_%d", value);
3931 function_symbol (wrk, func, SFUNC_STATIC);
3932 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3933 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3934 return true;
3937 /* Try to find a good way to implement the combiner pattern
3938 [(set (match_operand:SI 0 "register_operand" "r")
3939 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3940 (match_operand:SI 2 "const_int_operand" "n"))
3941 (match_operand:SI 3 "const_int_operand" "n"))) .
3942 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3943 return 0 for simple right / left or left/right shift combination.
3944 return 1 for a combination of shifts with zero_extend.
3945 return 2 for a combination of shifts with an AND that needs r0.
3946 return 3 for a combination of shifts with an AND that needs an extra
3947 scratch register, when the three highmost bits of the AND mask are clear.
3948 return 4 for a combination of shifts with an AND that needs an extra
3949 scratch register, when any of the three highmost bits of the AND mask
3950 is set.
3951 If ATTRP is set, store an initial right shift width in ATTRP[0],
3952 and the instruction length in ATTRP[1] . These values are not valid
3953 when returning 0.
3954 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3955 shift_amounts for the last shift value that is to be used before the
3956 sign extend. */
3958 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3960 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3961 int left = INTVAL (left_rtx), right;
3962 int best = 0;
3963 int cost, best_cost = 10000;
3964 int best_right = 0, best_len = 0;
3965 int i;
3966 int can_ext;
3968 if (left < 0 || left > 31)
3969 return 0;
3970 if (CONST_INT_P (mask_rtx))
3971 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3972 else
3973 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3974 /* Can this be expressed as a right shift / left shift pair? */
3975 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3976 right = exact_log2 (lsb);
3977 mask2 = ~(mask + lsb - 1);
3978 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3979 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3980 if (! mask2)
3981 best_cost = ashl_lshr_seq[right].insn_count
3982 + ashl_lshr_seq[right + left].insn_count;
3983 /* mask has no trailing zeroes <==> ! right */
3984 else if (! right && mask2 == ~(lsb2 - 1))
3986 int late_right = exact_log2 (lsb2);
3987 best_cost = ashl_lshr_seq[left + late_right].insn_count
3988 + ashl_lshr_seq[late_right].insn_count;
3990 /* Try to use zero extend. */
3991 if (mask2 == ~(lsb2 - 1))
3993 int width, first;
3995 for (width = 8; width <= 16; width += 8)
3997 /* Can we zero-extend right away? */
3998 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4000 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4001 + ext_ashl_lshr_seq[left + right].insn_count;
4002 if (cost < best_cost)
4004 best = 1;
4005 best_cost = cost;
4006 best_right = right;
4007 best_len = cost;
4008 if (attrp)
4009 attrp[2] = -1;
4011 continue;
4013 /* ??? Could try to put zero extend into initial right shift,
4014 or even shift a bit left before the right shift. */
4015 /* Determine value of first part of left shift, to get to the
4016 zero extend cut-off point. */
4017 first = width - exact_log2 (lsb2) + right;
4018 if (first >= 0 && right + left - first >= 0)
4020 cost = ext_ashl_lshr_seq[right].insn_count
4021 + ext_ashl_lshr_seq[first].insn_count + 1
4022 + ext_ashl_lshr_seq[right + left - first].insn_count;
4024 if (cost < best_cost)
4026 best = 1;
4027 best_cost = cost;
4028 best_right = right;
4029 best_len = cost;
4030 if (attrp)
4031 attrp[2] = first;
4036 /* Try to use r0 AND pattern */
4037 for (i = 0; i <= 2; i++)
4039 if (i > right)
4040 break;
4041 if (! CONST_OK_FOR_K08 (mask >> i))
4042 continue;
4043 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4044 if (cost < best_cost)
4046 best = 2;
4047 best_cost = cost;
4048 best_right = i;
4049 best_len = cost - 1;
4052 /* Try to use a scratch register to hold the AND operand. */
4053 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4054 for (i = 0; i <= 2; i++)
4056 if (i > right)
4057 break;
4058 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4059 + (can_ext
4060 ? ext_ashl_lshr_seq
4061 : ashl_lshr_seq)[left + i].insn_count;
4062 if (cost < best_cost)
4064 best = 4 - can_ext;
4065 best_cost = cost;
4066 best_right = i;
4067 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4071 if (attrp)
4073 attrp[0] = best_right;
4074 attrp[1] = best_len;
4076 return best;
4079 /* This is used in length attributes of the unnamed instructions
4080 corresponding to shl_and_kind return values of 1 and 2. */
4082 shl_and_length (rtx insn)
4084 rtx set_src, left_rtx, mask_rtx;
4085 int attributes[3];
4087 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4088 left_rtx = XEXP (XEXP (set_src, 0), 1);
4089 mask_rtx = XEXP (set_src, 1);
4090 shl_and_kind (left_rtx, mask_rtx, attributes);
4091 return attributes[1];
4094 /* This is used in length attribute of the and_shl_scratch instruction. */
4097 shl_and_scr_length (rtx insn)
4099 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4100 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4101 rtx op = XEXP (set_src, 0);
4102 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4103 op = XEXP (XEXP (op, 0), 0);
4104 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4107 /* Generate rtl for instructions for which shl_and_kind advised a particular
4108 method of generating them, i.e. returned zero. */
4110 bool
4111 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4113 int attributes[3];
4114 unsigned HOST_WIDE_INT mask;
4115 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4116 int right, total_shift;
4117 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4119 right = attributes[0];
4120 total_shift = INTVAL (left_rtx) + right;
4121 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4122 switch (kind)
4124 default:
4125 return true;
4126 case 1:
4128 int first = attributes[2];
4129 rtx operands[3];
4131 if (first < 0)
4133 emit_insn ((mask << right) <= 0xff
4134 ? gen_zero_extendqisi2 (dest,
4135 gen_lowpart (QImode, source))
4136 : gen_zero_extendhisi2 (dest,
4137 gen_lowpart (HImode, source)));
4138 source = dest;
4140 if (source != dest)
4141 emit_insn (gen_movsi (dest, source));
4142 operands[0] = dest;
4143 if (right)
4145 operands[2] = GEN_INT (right);
4146 gen_shifty_hi_op (LSHIFTRT, operands);
4148 if (first > 0)
4150 operands[2] = GEN_INT (first);
4151 gen_shifty_hi_op (ASHIFT, operands);
4152 total_shift -= first;
4153 mask <<= first;
4155 if (first >= 0)
4156 emit_insn (mask <= 0xff
4157 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4158 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4159 if (total_shift > 0)
4161 operands[2] = GEN_INT (total_shift);
4162 gen_shifty_hi_op (ASHIFT, operands);
4164 break;
4166 case 4:
4167 shift_gen_fun = gen_shifty_op;
4168 case 3:
4169 /* If the topmost bit that matters is set, set the topmost bits
4170 that don't matter. This way, we might be able to get a shorter
4171 signed constant. */
4172 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4173 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4174 case 2:
4175 /* Don't expand fine-grained when combining, because that will
4176 make the pattern fail. */
4177 if (currently_expanding_to_rtl
4178 || reload_in_progress || reload_completed)
4180 rtx operands[3];
4182 /* Cases 3 and 4 should be handled by this split
4183 only while combining */
4184 gcc_assert (kind <= 2);
4185 if (right)
4187 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4188 source = dest;
4190 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4191 if (total_shift)
4193 operands[0] = dest;
4194 operands[1] = dest;
4195 operands[2] = GEN_INT (total_shift);
4196 shift_gen_fun (ASHIFT, operands);
4198 break;
4200 else
4202 int neg = 0;
4203 if (kind != 4 && total_shift < 16)
4205 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4206 if (neg > 0)
4207 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4208 else
4209 neg = 0;
4211 emit_insn (gen_and_shl_scratch (dest, source,
4212 GEN_INT (right),
4213 GEN_INT (mask),
4214 GEN_INT (total_shift + neg),
4215 GEN_INT (neg)));
4216 emit_insn (gen_movsi (dest, dest));
4217 break;
4220 return false;
4223 /* Try to find a good way to implement the combiner pattern
4224 [(set (match_operand:SI 0 "register_operand" "=r")
4225 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4226 (match_operand:SI 2 "const_int_operand" "n")
4227 (match_operand:SI 3 "const_int_operand" "n")
4228 (const_int 0)))
4229 (clobber (reg:SI T_REG))]
4230 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4231 return 0 for simple left / right shift combination.
4232 return 1 for left shift / 8 bit sign extend / left shift.
4233 return 2 for left shift / 16 bit sign extend / left shift.
4234 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4235 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4236 return 5 for left shift / 16 bit sign extend / right shift
4237 return 6 for < 8 bit sign extend / left shift.
4238 return 7 for < 8 bit sign extend / left shift / single right shift.
4239 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4242 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4244 int left, size, insize, ext;
4245 int cost = 0, best_cost;
4246 int kind;
4248 left = INTVAL (left_rtx);
4249 size = INTVAL (size_rtx);
4250 insize = size - left;
4251 gcc_assert (insize > 0);
4252 /* Default to left / right shift. */
4253 kind = 0;
4254 best_cost = ashl_lshr_seq[32 - insize].insn_count
4255 + ashl_lshr_seq[32 - size].insn_count;
4256 if (size <= 16)
4258 /* 16 bit shift / sign extend / 16 bit shift */
4259 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4260 + ashl_lshr_seq[16 - size].insn_count;
4261 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4262 below, by alternative 3 or something even better. */
4263 if (cost < best_cost)
4265 kind = 5;
4266 best_cost = cost;
4269 /* Try a plain sign extend between two shifts. */
4270 for (ext = 16; ext >= insize; ext -= 8)
4272 if (ext <= size)
4274 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4275 + ashl_lshr_seq[size - ext].insn_count;
4276 if (cost < best_cost)
4278 kind = ext / (unsigned) 8;
4279 best_cost = cost;
4282 /* Check if we can do a sloppy shift with a final signed shift
4283 restoring the sign. */
4284 if (EXT_SHIFT_SIGNED (size - ext))
4285 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4286 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4287 /* If not, maybe it's still cheaper to do the second shift sloppy,
4288 and do a final sign extend? */
4289 else if (size <= 16)
4290 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4291 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4292 + 1;
4293 else
4294 continue;
4295 if (cost < best_cost)
4297 kind = ext / (unsigned) 8 + 2;
4298 best_cost = cost;
4301 /* Check if we can sign extend in r0 */
4302 if (insize < 8)
4304 cost = 3 + ashl_lshr_seq[left].insn_count;
4305 if (cost < best_cost)
4307 kind = 6;
4308 best_cost = cost;
4310 /* Try the same with a final signed shift. */
4311 if (left < 31)
4313 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4314 if (cost < best_cost)
4316 kind = 7;
4317 best_cost = cost;
4321 if (TARGET_DYNSHIFT)
4323 /* Try to use a dynamic shift. */
4324 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4325 if (cost < best_cost)
4327 kind = 0;
4328 best_cost = cost;
4331 if (costp)
4332 *costp = cost;
4333 return kind;
4336 /* Function to be used in the length attribute of the instructions
4337 implementing this pattern. */
4340 shl_sext_length (rtx insn)
4342 rtx set_src, left_rtx, size_rtx;
4343 int cost;
4345 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4346 left_rtx = XEXP (XEXP (set_src, 0), 1);
4347 size_rtx = XEXP (set_src, 1);
4348 shl_sext_kind (left_rtx, size_rtx, &cost);
4349 return cost;
4352 /* Generate rtl for this pattern */
4354 bool
4355 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4357 int kind;
4358 int left, size, insize, cost;
4359 rtx operands[3];
4361 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4362 left = INTVAL (left_rtx);
4363 size = INTVAL (size_rtx);
4364 insize = size - left;
4365 switch (kind)
4367 case 1:
4368 case 2:
4369 case 3:
4370 case 4:
4372 int ext = kind & 1 ? 8 : 16;
4373 int shift2 = size - ext;
4375 /* Don't expand fine-grained when combining, because that will
4376 make the pattern fail. */
4377 if (! currently_expanding_to_rtl
4378 && ! reload_in_progress && ! reload_completed)
4380 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4381 emit_insn (gen_movsi (dest, source));
4382 break;
4384 if (dest != source)
4385 emit_insn (gen_movsi (dest, source));
4386 operands[0] = dest;
4387 if (ext - insize)
4389 operands[2] = GEN_INT (ext - insize);
4390 gen_shifty_hi_op (ASHIFT, operands);
4392 emit_insn (kind & 1
4393 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4394 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4395 if (kind <= 2)
4397 if (shift2)
4399 operands[2] = GEN_INT (shift2);
4400 gen_shifty_op (ASHIFT, operands);
4403 else
4405 if (shift2 > 0)
4407 if (EXT_SHIFT_SIGNED (shift2))
4409 operands[2] = GEN_INT (shift2 + 1);
4410 gen_shifty_op (ASHIFT, operands);
4411 operands[2] = const1_rtx;
4412 gen_shifty_op (ASHIFTRT, operands);
4413 break;
4415 operands[2] = GEN_INT (shift2);
4416 gen_shifty_hi_op (ASHIFT, operands);
4418 else if (shift2)
4420 operands[2] = GEN_INT (-shift2);
4421 gen_shifty_hi_op (LSHIFTRT, operands);
4423 emit_insn (size <= 8
4424 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4425 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4427 break;
4429 case 5:
4431 int i = 16 - size;
4432 if (! currently_expanding_to_rtl
4433 && ! reload_in_progress && ! reload_completed)
4434 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4435 else
4437 operands[0] = dest;
4438 operands[2] = GEN_INT (16 - insize);
4439 gen_shifty_hi_op (ASHIFT, operands);
4440 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4442 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4443 while (--i >= 0)
4444 gen_ashift (ASHIFTRT, 1, dest);
4445 break;
4447 case 6:
4448 case 7:
4449 /* Don't expand fine-grained when combining, because that will
4450 make the pattern fail. */
4451 if (! currently_expanding_to_rtl
4452 && ! reload_in_progress && ! reload_completed)
4454 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4455 emit_insn (gen_movsi (dest, source));
4456 break;
4458 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4459 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4460 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4461 operands[0] = dest;
4462 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4463 gen_shifty_op (ASHIFT, operands);
4464 if (kind == 7)
4465 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4466 break;
4467 default:
4468 return true;
4470 return false;
4473 /* Prefix a symbol_ref name with "datalabel". */
4476 gen_datalabel_ref (rtx sym)
4478 const char *str;
4480 if (GET_CODE (sym) == LABEL_REF)
4481 return gen_rtx_CONST (GET_MODE (sym),
4482 gen_rtx_UNSPEC (GET_MODE (sym),
4483 gen_rtvec (1, sym),
4484 UNSPEC_DATALABEL));
4486 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4488 str = XSTR (sym, 0);
4489 /* Share all SYMBOL_REF strings with the same value - that is important
4490 for cse. */
4491 str = IDENTIFIER_POINTER (get_identifier (str));
4492 XSTR (sym, 0) = str;
4494 return sym;
4498 static alloc_pool label_ref_list_pool;
4500 typedef struct label_ref_list_d
4502 rtx label;
4503 struct label_ref_list_d *next;
4504 } *label_ref_list_t;
4506 /* The SH cannot load a large constant into a register, constants have to
4507 come from a pc relative load. The reference of a pc relative load
4508 instruction must be less than 1k in front of the instruction. This
4509 means that we often have to dump a constant inside a function, and
4510 generate code to branch around it.
4512 It is important to minimize this, since the branches will slow things
4513 down and make things bigger.
4515 Worst case code looks like:
4517 mov.l L1,rn
4518 bra L2
4520 align
4521 L1: .long value
4525 mov.l L3,rn
4526 bra L4
4528 align
4529 L3: .long value
4533 We fix this by performing a scan before scheduling, which notices which
4534 instructions need to have their operands fetched from the constant table
4535 and builds the table.
4537 The algorithm is:
4539 scan, find an instruction which needs a pcrel move. Look forward, find the
4540 last barrier which is within MAX_COUNT bytes of the requirement.
4541 If there isn't one, make one. Process all the instructions between
4542 the find and the barrier.
4544 In the above example, we can tell that L3 is within 1k of L1, so
4545 the first move can be shrunk from the 3 insn+constant sequence into
4546 just 1 insn, and the constant moved to L3 to make:
4548 mov.l L1,rn
4550 mov.l L3,rn
4551 bra L4
4553 align
4554 L3:.long value
4555 L4:.long value
4557 Then the second move becomes the target for the shortening process. */
4559 typedef struct
4561 rtx value; /* Value in table. */
4562 rtx label; /* Label of value. */
4563 label_ref_list_t wend; /* End of window. */
4564 enum machine_mode mode; /* Mode of value. */
4566 /* True if this constant is accessed as part of a post-increment
4567 sequence. Note that HImode constants are never accessed in this way. */
4568 bool part_of_sequence_p;
4569 } pool_node;
4571 /* The maximum number of constants that can fit into one pool, since
4572 constants in the range 0..510 are at least 2 bytes long, and in the
4573 range from there to 1018 at least 4 bytes. */
4575 #define MAX_POOL_SIZE 372
4576 static pool_node pool_vector[MAX_POOL_SIZE];
4577 static int pool_size;
4578 static rtx pool_window_label;
4579 static int pool_window_last;
4581 static int max_labelno_before_reorg;
4583 /* ??? If we need a constant in HImode which is the truncated value of a
4584 constant we need in SImode, we could combine the two entries thus saving
4585 two bytes. Is this common enough to be worth the effort of implementing
4586 it? */
4588 /* ??? This stuff should be done at the same time that we shorten branches.
4589 As it is now, we must assume that all branches are the maximum size, and
4590 this causes us to almost always output constant pools sooner than
4591 necessary. */
4593 /* Add a constant to the pool and return its label. */
4595 static rtx
4596 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4598 int i;
4599 rtx lab, new_rtx;
4600 label_ref_list_t ref, newref;
4602 /* First see if we've already got it. */
4603 for (i = 0; i < pool_size; i++)
4605 if (x->code == pool_vector[i].value->code
4606 && mode == pool_vector[i].mode)
4608 if (x->code == CODE_LABEL)
4610 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4611 continue;
4613 if (rtx_equal_p (x, pool_vector[i].value))
4615 lab = new_rtx = 0;
4616 if (! last_value
4617 || ! i
4618 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4620 new_rtx = gen_label_rtx ();
4621 LABEL_REFS (new_rtx) = pool_vector[i].label;
4622 pool_vector[i].label = lab = new_rtx;
4624 if (lab && pool_window_label)
4626 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4627 newref->label = pool_window_label;
4628 ref = pool_vector[pool_window_last].wend;
4629 newref->next = ref;
4630 pool_vector[pool_window_last].wend = newref;
4632 if (new_rtx)
4633 pool_window_label = new_rtx;
4634 pool_window_last = i;
4635 return lab;
4640 /* Need a new one. */
4641 pool_vector[pool_size].value = x;
4642 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4644 lab = 0;
4645 pool_vector[pool_size - 1].part_of_sequence_p = true;
4647 else
4648 lab = gen_label_rtx ();
4649 pool_vector[pool_size].mode = mode;
4650 pool_vector[pool_size].label = lab;
4651 pool_vector[pool_size].wend = NULL;
4652 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4653 if (lab && pool_window_label)
4655 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4656 newref->label = pool_window_label;
4657 ref = pool_vector[pool_window_last].wend;
4658 newref->next = ref;
4659 pool_vector[pool_window_last].wend = newref;
4661 if (lab)
4662 pool_window_label = lab;
4663 pool_window_last = pool_size;
4664 pool_size++;
4665 return lab;
4668 /* Output the literal table. START, if nonzero, is the first instruction
4669 this table is needed for, and also indicates that there is at least one
4670 casesi_worker_2 instruction; We have to emit the operand3 labels from
4671 these insns at a 4-byte aligned position. BARRIER is the barrier
4672 after which we are to place the table. */
4674 static void
4675 dump_table (rtx start, rtx barrier)
4677 rtx scan = barrier;
4678 int i;
4679 bool need_align = true;
4680 rtx lab;
4681 label_ref_list_t ref;
4682 bool have_df = false;
4684 /* Do two passes, first time dump out the HI sized constants. */
4686 for (i = 0; i < pool_size; i++)
4688 pool_node *p = &pool_vector[i];
4690 if (p->mode == HImode)
4692 if (need_align)
4694 scan = emit_insn_after (gen_align_2 (), scan);
4695 need_align = false;
4697 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4698 scan = emit_label_after (lab, scan);
4699 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4700 scan);
4701 for (ref = p->wend; ref; ref = ref->next)
4703 lab = ref->label;
4704 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4707 else if (p->mode == DFmode)
4708 have_df = true;
4711 need_align = true;
4713 if (start)
4715 scan = emit_insn_after (gen_align_4 (), scan);
4716 need_align = false;
4717 for (; start != barrier; start = NEXT_INSN (start))
4718 if (NONJUMP_INSN_P (start)
4719 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4721 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4722 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4724 scan = emit_label_after (lab, scan);
4727 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4729 rtx align_insn = NULL_RTX;
4731 scan = emit_label_after (gen_label_rtx (), scan);
4732 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4733 need_align = false;
4735 for (i = 0; i < pool_size; i++)
4737 pool_node *p = &pool_vector[i];
4739 switch (p->mode)
4741 case HImode:
4742 break;
4743 case SImode:
4744 case SFmode:
4745 if (align_insn && !p->part_of_sequence_p)
4747 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4748 emit_label_before (lab, align_insn);
4749 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4750 align_insn);
4751 for (ref = p->wend; ref; ref = ref->next)
4753 lab = ref->label;
4754 emit_insn_before (gen_consttable_window_end (lab),
4755 align_insn);
4757 delete_insn (align_insn);
4758 align_insn = NULL_RTX;
4759 continue;
4761 else
4763 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4764 scan = emit_label_after (lab, scan);
4765 scan = emit_insn_after (gen_consttable_4 (p->value,
4766 const0_rtx), scan);
4767 need_align = ! need_align;
4769 break;
4770 case DFmode:
4771 if (need_align)
4773 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4774 align_insn = scan;
4775 need_align = false;
4777 case DImode:
4778 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4779 scan = emit_label_after (lab, scan);
4780 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4781 scan);
4782 break;
4783 default:
4784 gcc_unreachable ();
4787 if (p->mode != HImode)
4789 for (ref = p->wend; ref; ref = ref->next)
4791 lab = ref->label;
4792 scan = emit_insn_after (gen_consttable_window_end (lab),
4793 scan);
4798 pool_size = 0;
4801 for (i = 0; i < pool_size; i++)
4803 pool_node *p = &pool_vector[i];
4805 switch (p->mode)
4807 case HImode:
4808 break;
4809 case SImode:
4810 case SFmode:
4811 if (need_align)
4813 need_align = false;
4814 scan = emit_label_after (gen_label_rtx (), scan);
4815 scan = emit_insn_after (gen_align_4 (), scan);
4817 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4818 scan = emit_label_after (lab, scan);
4819 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4820 scan);
4821 break;
4822 case DFmode:
4823 case DImode:
4824 if (need_align)
4826 need_align = false;
4827 scan = emit_label_after (gen_label_rtx (), scan);
4828 scan = emit_insn_after (gen_align_4 (), scan);
4830 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4831 scan = emit_label_after (lab, scan);
4832 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4833 scan);
4834 break;
4835 default:
4836 gcc_unreachable ();
4839 if (p->mode != HImode)
4841 for (ref = p->wend; ref; ref = ref->next)
4843 lab = ref->label;
4844 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4849 scan = emit_insn_after (gen_consttable_end (), scan);
4850 scan = emit_barrier_after (scan);
4851 pool_size = 0;
4852 pool_window_label = NULL_RTX;
4853 pool_window_last = 0;
4856 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4858 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4860 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4861 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4862 need to fix it if the input value is CONST_OK_FOR_I08. */
4864 static bool
4865 broken_move (rtx insn)
4867 if (NONJUMP_INSN_P (insn))
4869 rtx pat = PATTERN (insn);
4870 if (GET_CODE (pat) == PARALLEL)
4871 pat = XVECEXP (pat, 0, 0);
4872 if (GET_CODE (pat) == SET
4873 /* We can load any 8-bit value if we don't care what the high
4874 order bits end up as. */
4875 && GET_MODE (SET_DEST (pat)) != QImode
4876 && (CONSTANT_P (SET_SRC (pat))
4877 /* Match mova_const. */
4878 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4879 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4880 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4881 && ! (TARGET_SH2E
4882 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4883 && (fp_zero_operand (SET_SRC (pat))
4884 || fp_one_operand (SET_SRC (pat)))
4885 /* In general we don't know the current setting of fpscr, so disable fldi.
4886 There is an exception if this was a register-register move
4887 before reload - and hence it was ascertained that we have
4888 single precision setting - and in a post-reload optimization
4889 we changed this to do a constant load. In that case
4890 we don't have an r0 clobber, hence we must use fldi. */
4891 && (TARGET_FMOVD
4892 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4893 == SCRATCH))
4894 && REG_P (SET_DEST (pat))
4895 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4896 && ! (TARGET_SH2A
4897 && GET_MODE (SET_DEST (pat)) == SImode
4898 && (satisfies_constraint_I20 (SET_SRC (pat))
4899 || satisfies_constraint_I28 (SET_SRC (pat))))
4900 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4901 return true;
4904 return false;
4907 static bool
4908 mova_p (rtx insn)
4910 return (NONJUMP_INSN_P (insn)
4911 && GET_CODE (PATTERN (insn)) == SET
4912 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4913 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4914 /* Don't match mova_const. */
4915 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4918 /* Fix up a mova from a switch that went out of range. */
4919 static void
4920 fixup_mova (rtx mova)
4922 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4923 if (! flag_pic)
4925 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4926 INSN_CODE (mova) = -1;
4928 else
4930 rtx worker = mova;
4931 rtx lab = gen_label_rtx ();
4932 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4936 worker = NEXT_INSN (worker);
4937 gcc_assert (worker
4938 && !LABEL_P (worker)
4939 && !JUMP_P (worker));
4940 } while (NOTE_P (worker)
4941 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4942 wpat = PATTERN (worker);
4943 wpat0 = XVECEXP (wpat, 0, 0);
4944 wpat1 = XVECEXP (wpat, 0, 1);
4945 wsrc = SET_SRC (wpat0);
4946 PATTERN (worker) = (gen_casesi_worker_2
4947 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4948 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4949 XEXP (wpat1, 0)));
4950 INSN_CODE (worker) = -1;
4951 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4952 base = gen_rtx_LABEL_REF (Pmode, lab);
4953 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4954 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4955 INSN_CODE (mova) = -1;
4959 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4960 *num_mova, and check if the new mova is not nested within the first one.
4961 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4962 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4963 static int
4964 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4966 int n_addr = 0; /* Initialization to shut up spurious warning. */
4967 int f_target, n_target = 0; /* Likewise. */
4969 if (optimize)
4971 /* If NEW_MOVA has no address yet, it will be handled later. */
4972 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4973 return -1;
4975 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4976 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4977 if (n_addr > n_target || n_addr + 1022 < n_target)
4979 /* Change the mova into a load.
4980 broken_move will then return true for it. */
4981 fixup_mova (new_mova);
4982 return 1;
4985 if (!(*num_mova)++)
4987 *first_mova = new_mova;
4988 return 2;
4990 if (!optimize
4991 || ((f_target
4992 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4993 >= n_target))
4994 return -1;
4996 (*num_mova)--;
4997 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4998 > n_target - n_addr)
5000 fixup_mova (*first_mova);
5001 return 0;
5003 else
5005 fixup_mova (new_mova);
5006 return 1;
5010 /* Find the last barrier from insn FROM which is close enough to hold the
5011 constant pool. If we can't find one, then create one near the end of
5012 the range. */
5014 static rtx
5015 find_barrier (int num_mova, rtx mova, rtx from)
5017 int count_si = 0;
5018 int count_hi = 0;
5019 int found_hi = 0;
5020 int found_si = 0;
5021 int found_di = 0;
5022 int hi_align = 2;
5023 int si_align = 2;
5024 int leading_mova = num_mova;
5025 rtx barrier_before_mova = NULL_RTX;
5026 rtx found_barrier = NULL_RTX;
5027 rtx good_barrier = NULL_RTX;
5028 int si_limit;
5029 int hi_limit;
5030 rtx orig = from;
5031 rtx last_got = NULL_RTX;
5032 rtx last_symoff = NULL_RTX;
5034 /* For HImode: range is 510, add 4 because pc counts from address of
5035 second instruction after this one, subtract 2 for the jump instruction
5036 that we may need to emit before the table, subtract 2 for the instruction
5037 that fills the jump delay slot (in very rare cases, reorg will take an
5038 instruction from after the constant pool or will leave the delay slot
5039 empty). This gives 510.
5040 For SImode: range is 1020, add 4 because pc counts from address of
5041 second instruction after this one, subtract 2 in case pc is 2 byte
5042 aligned, subtract 2 for the jump instruction that we may need to emit
5043 before the table, subtract 2 for the instruction that fills the jump
5044 delay slot. This gives 1018. */
5046 /* The branch will always be shortened now that the reference address for
5047 forward branches is the successor address, thus we need no longer make
5048 adjustments to the [sh]i_limit for -O0. */
5050 si_limit = 1018;
5051 hi_limit = 510;
5053 while (from && count_si < si_limit && count_hi < hi_limit)
5055 int inc = get_attr_length (from);
5056 int new_align = 1;
5058 /* If this is a label that existed at the time of the compute_alignments
5059 call, determine the alignment. N.B. When find_barrier recurses for
5060 an out-of-reach mova, we might see labels at the start of previously
5061 inserted constant tables. */
5062 if (LABEL_P (from)
5063 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5065 if (optimize)
5066 new_align = 1 << label_to_alignment (from);
5067 else if (BARRIER_P (prev_nonnote_insn (from)))
5068 new_align = 1 << barrier_align (from);
5069 else
5070 new_align = 1;
5071 inc = 0;
5073 /* In case we are scanning a constant table because of recursion, check
5074 for explicit alignments. If the table is long, we might be forced
5075 to emit the new table in front of it; the length of the alignment
5076 might be the last straw. */
5077 else if (NONJUMP_INSN_P (from)
5078 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5079 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5080 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5081 /* When we find the end of a constant table, paste the new constant
5082 at the end. That is better than putting it in front because
5083 this way, we don't need extra alignment for adding a 4-byte-aligned
5084 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5085 else if (NONJUMP_INSN_P (from)
5086 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5087 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5088 return from;
5090 if (BARRIER_P (from))
5092 rtx next;
5094 found_barrier = from;
5096 /* If we are at the end of the function, or in front of an alignment
5097 instruction, we need not insert an extra alignment. We prefer
5098 this kind of barrier. */
5099 if (barrier_align (from) > 2)
5100 good_barrier = from;
5102 /* If we are at the end of a hot/cold block, dump the constants
5103 here. */
5104 next = NEXT_INSN (from);
5105 if (next
5106 && NOTE_P (next)
5107 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5108 break;
5111 if (broken_move (from))
5113 rtx pat, src, dst;
5114 enum machine_mode mode;
5116 pat = PATTERN (from);
5117 if (GET_CODE (pat) == PARALLEL)
5118 pat = XVECEXP (pat, 0, 0);
5119 src = SET_SRC (pat);
5120 dst = SET_DEST (pat);
5121 mode = GET_MODE (dst);
5123 /* GOT pcrelat setting comes in pair of
5124 mova .L8,r0
5125 mov.l .L8,r12
5126 instructions. (plus add r0,r12).
5127 Remember if we see one without the other. */
5128 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5129 last_got = last_got ? NULL_RTX : from;
5130 else if (PIC_ADDR_P (src))
5131 last_got = last_got ? NULL_RTX : from;
5133 /* We must explicitly check the mode, because sometimes the
5134 front end will generate code to load unsigned constants into
5135 HImode targets without properly sign extending them. */
5136 if (mode == HImode
5137 || (mode == SImode && satisfies_constraint_I16 (src)
5138 && REGNO (dst) != FPUL_REG))
5140 found_hi += 2;
5141 /* We put the short constants before the long constants, so
5142 we must count the length of short constants in the range
5143 for the long constants. */
5144 /* ??? This isn't optimal, but is easy to do. */
5145 si_limit -= 2;
5147 else
5149 /* We dump DF/DI constants before SF/SI ones, because
5150 the limit is the same, but the alignment requirements
5151 are higher. We may waste up to 4 additional bytes
5152 for alignment, and the DF/DI constant may have
5153 another SF/SI constant placed before it. */
5154 if (TARGET_SHCOMPACT
5155 && ! found_di
5156 && (mode == DFmode || mode == DImode))
5158 found_di = 1;
5159 si_limit -= 8;
5161 while (si_align > 2 && found_si + si_align - 2 > count_si)
5162 si_align >>= 1;
5163 if (found_si > count_si)
5164 count_si = found_si;
5165 found_si += GET_MODE_SIZE (mode);
5166 if (num_mova)
5167 si_limit -= GET_MODE_SIZE (mode);
5171 if (mova_p (from))
5173 switch (untangle_mova (&num_mova, &mova, from))
5175 case 1:
5176 if (flag_pic)
5178 rtx src = SET_SRC (PATTERN (from));
5179 if (GET_CODE (src) == CONST
5180 && GET_CODE (XEXP (src, 0)) == UNSPEC
5181 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5182 last_symoff = from;
5184 break;
5185 case 0: return find_barrier (0, 0, mova);
5186 case 2:
5188 leading_mova = 0;
5189 barrier_before_mova
5190 = good_barrier ? good_barrier : found_barrier;
5192 default: break;
5194 if (found_si > count_si)
5195 count_si = found_si;
5197 else if (JUMP_TABLE_DATA_P (from))
5199 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5200 || (num_mova
5201 && (prev_nonnote_insn (from)
5202 == XEXP (MOVA_LABELREF (mova), 0))))
5203 num_mova--;
5204 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5206 /* We have just passed the barrier in front of the
5207 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5208 the ADDR_DIFF_VEC is accessed as data, just like our pool
5209 constants, this is a good opportunity to accommodate what
5210 we have gathered so far.
5211 If we waited any longer, we could end up at a barrier in
5212 front of code, which gives worse cache usage for separated
5213 instruction / data caches. */
5214 good_barrier = found_barrier;
5215 break;
5217 else
5219 rtx body = PATTERN (from);
5220 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5223 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5224 else if (JUMP_P (from)
5225 && ! TARGET_SH2
5226 && ! optimize_size)
5227 new_align = 4;
5229 /* There is a possibility that a bf is transformed into a bf/s by the
5230 delay slot scheduler. */
5231 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
5232 && get_attr_type (from) == TYPE_CBRANCH
5233 && ! sequence_insn_p (from))
5234 inc += 2;
5236 if (found_si)
5238 count_si += inc;
5239 if (new_align > si_align)
5241 si_limit -= (count_si - 1) & (new_align - si_align);
5242 si_align = new_align;
5244 count_si = (count_si + new_align - 1) & -new_align;
5246 if (found_hi)
5248 count_hi += inc;
5249 if (new_align > hi_align)
5251 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5252 hi_align = new_align;
5254 count_hi = (count_hi + new_align - 1) & -new_align;
5256 from = NEXT_INSN (from);
5259 if (num_mova)
5261 if (leading_mova)
5263 /* Try as we might, the leading mova is out of range. Change
5264 it into a load (which will become a pcload) and retry. */
5265 fixup_mova (mova);
5266 return find_barrier (0, 0, mova);
5268 else
5270 /* Insert the constant pool table before the mova instruction,
5271 to prevent the mova label reference from going out of range. */
5272 from = mova;
5273 good_barrier = found_barrier = barrier_before_mova;
5277 if (found_barrier)
5279 if (good_barrier && next_real_insn (found_barrier))
5280 found_barrier = good_barrier;
5282 else
5284 /* We didn't find a barrier in time to dump our stuff,
5285 so we'll make one. */
5286 rtx label = gen_label_rtx ();
5288 /* Don't emit a constant table in the middle of insns for
5289 casesi_worker_2. This is a bit overkill but is enough
5290 because casesi_worker_2 wouldn't appear so frequently. */
5291 if (last_symoff)
5292 from = last_symoff;
5294 /* If we exceeded the range, then we must back up over the last
5295 instruction we looked at. Otherwise, we just need to undo the
5296 NEXT_INSN at the end of the loop. */
5297 if (PREV_INSN (from) != orig
5298 && (count_hi > hi_limit || count_si > si_limit))
5299 from = PREV_INSN (PREV_INSN (from));
5300 else
5301 from = PREV_INSN (from);
5303 /* Don't emit a constant table int the middle of global pointer setting,
5304 since that that would move the addressing base GOT into another table.
5305 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5306 in the pool anyway, so just move up the whole constant pool.
5308 However, avoid doing so when the last single GOT mov is the starting
5309 insn itself. Going past above the start insn would create a negative
5310 offset, causing errors. */
5311 if (last_got && last_got != orig)
5312 from = PREV_INSN (last_got);
5314 /* Don't insert the constant pool table at the position which
5315 may be the landing pad. */
5316 if (flag_exceptions
5317 && CALL_P (from)
5318 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5319 from = PREV_INSN (from);
5321 /* Walk back to be just before any jump or label.
5322 Putting it before a label reduces the number of times the branch
5323 around the constant pool table will be hit. Putting it before
5324 a jump makes it more likely that the bra delay slot will be
5325 filled. */
5326 while (NOTE_P (from) || JUMP_P (from)
5327 || LABEL_P (from))
5328 from = PREV_INSN (from);
5330 /* Make sure we do not split between a call and its corresponding
5331 CALL_ARG_LOCATION note. */
5332 if (CALL_P (from))
5334 rtx next = NEXT_INSN (from);
5335 if (next && NOTE_P (next)
5336 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5337 from = next;
5340 from = emit_jump_insn_after (gen_jump (label), from);
5341 JUMP_LABEL (from) = label;
5342 LABEL_NUSES (label) = 1;
5343 found_barrier = emit_barrier_after (from);
5344 emit_label_after (label, found_barrier);
5347 return found_barrier;
5350 /* If the instruction INSN is implemented by a special function, and we can
5351 positively find the register that is used to call the sfunc, and this
5352 register is not used anywhere else in this instruction - except as the
5353 destination of a set, return this register; else, return 0. */
5355 sfunc_uses_reg (rtx insn)
5357 int i;
5358 rtx pattern, part, reg_part, reg;
5360 if (!NONJUMP_INSN_P (insn))
5361 return NULL_RTX;
5362 pattern = PATTERN (insn);
5363 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5364 return NULL_RTX;
5366 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5368 part = XVECEXP (pattern, 0, i);
5369 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5370 reg_part = part;
5372 if (! reg_part)
5373 return NULL_RTX;
5374 reg = XEXP (reg_part, 0);
5375 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5377 part = XVECEXP (pattern, 0, i);
5378 if (part == reg_part || GET_CODE (part) == CLOBBER)
5379 continue;
5380 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5381 && REG_P (SET_DEST (part)))
5382 ? SET_SRC (part) : part)))
5383 return NULL_RTX;
5385 return reg;
5388 /* See if the only way in which INSN uses REG is by calling it, or by
5389 setting it while calling it. Set *SET to a SET rtx if the register
5390 is set by INSN. */
5392 static bool
5393 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
5395 rtx pattern, reg2;
5397 *set = NULL_RTX;
5399 reg2 = sfunc_uses_reg (insn);
5400 if (reg2 && REGNO (reg2) == REGNO (reg))
5402 pattern = single_set (insn);
5403 if (pattern
5404 && REG_P (SET_DEST (pattern))
5405 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5406 *set = pattern;
5407 return false;
5409 if (!CALL_P (insn))
5411 /* We don't use rtx_equal_p because we don't care if the mode is
5412 different. */
5413 pattern = single_set (insn);
5414 if (pattern
5415 && REG_P (SET_DEST (pattern))
5416 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5418 rtx par, part;
5419 int i;
5421 *set = pattern;
5422 par = PATTERN (insn);
5423 if (GET_CODE (par) == PARALLEL)
5424 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5426 part = XVECEXP (par, 0, i);
5427 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5428 return true;
5430 return reg_mentioned_p (reg, SET_SRC (pattern));
5433 return true;
5436 pattern = PATTERN (insn);
5438 if (GET_CODE (pattern) == PARALLEL)
5440 int i;
5442 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5443 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5444 return true;
5445 pattern = XVECEXP (pattern, 0, 0);
5448 if (GET_CODE (pattern) == SET)
5450 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5452 /* We don't use rtx_equal_p, because we don't care if the
5453 mode is different. */
5454 if (!REG_P (SET_DEST (pattern))
5455 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5456 return true;
5458 *set = pattern;
5461 pattern = SET_SRC (pattern);
5464 if (GET_CODE (pattern) != CALL
5465 || !MEM_P (XEXP (pattern, 0))
5466 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5467 return true;
5469 return false;
5472 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5473 general registers. Bits 0..15 mean that the respective registers
5474 are used as inputs in the instruction. Bits 16..31 mean that the
5475 registers 0..15, respectively, are used as outputs, or are clobbered.
5476 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5478 regs_used (rtx x, int is_dest)
5480 enum rtx_code code;
5481 const char *fmt;
5482 int i, used = 0;
5484 if (! x)
5485 return used;
5486 code = GET_CODE (x);
5487 switch (code)
5489 case REG:
5490 if (REGNO (x) < 16)
5491 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5492 << (REGNO (x) + is_dest));
5493 return 0;
5494 case SUBREG:
5496 rtx y = SUBREG_REG (x);
5498 if (!REG_P (y))
5499 break;
5500 if (REGNO (y) < 16)
5501 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5502 << (REGNO (y) +
5503 subreg_regno_offset (REGNO (y),
5504 GET_MODE (y),
5505 SUBREG_BYTE (x),
5506 GET_MODE (x)) + is_dest));
5507 return 0;
5509 case SET:
5510 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5511 case RETURN:
5512 /* If there was a return value, it must have been indicated with USE. */
5513 return 0x00ffff00;
5514 case CLOBBER:
5515 is_dest = 1;
5516 break;
5517 case MEM:
5518 is_dest = 0;
5519 break;
5520 case CALL:
5521 used |= 0x00ff00f0;
5522 break;
5523 default:
5524 break;
5527 fmt = GET_RTX_FORMAT (code);
5529 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5531 if (fmt[i] == 'E')
5533 int j;
5534 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5535 used |= regs_used (XVECEXP (x, i, j), is_dest);
5537 else if (fmt[i] == 'e')
5538 used |= regs_used (XEXP (x, i), is_dest);
5540 return used;
5543 /* Create an instruction that prevents redirection of a conditional branch
5544 to the destination of the JUMP with address ADDR.
5545 If the branch needs to be implemented as an indirect jump, try to find
5546 a scratch register for it.
5547 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5548 If any preceding insn that doesn't fit into a delay slot is good enough,
5549 pass 1. Pass 2 if a definite blocking insn is needed.
5550 -1 is used internally to avoid deep recursion.
5551 If a blocking instruction is made or recognized, return it. */
5553 static rtx
5554 gen_block_redirect (rtx jump, int addr, int need_block)
5556 int dead = 0;
5557 rtx prev = prev_nonnote_insn (jump);
5558 rtx dest;
5560 /* First, check if we already have an instruction that satisfies our need. */
5561 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
5563 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5564 return prev;
5565 if (GET_CODE (PATTERN (prev)) == USE
5566 || GET_CODE (PATTERN (prev)) == CLOBBER
5567 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5568 prev = jump;
5569 else if ((need_block &= ~1) < 0)
5570 return prev;
5571 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5572 need_block = 0;
5574 if (GET_CODE (PATTERN (jump)) == RETURN)
5576 if (! need_block)
5577 return prev;
5578 /* Reorg even does nasty things with return insns that cause branches
5579 to go out of range - see find_end_label and callers. */
5580 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5582 /* We can't use JUMP_LABEL here because it might be undefined
5583 when not optimizing. */
5584 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5585 /* If the branch is out of range, try to find a scratch register for it. */
5586 if (optimize
5587 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5588 > 4092 + 4098))
5590 rtx scan;
5591 /* Don't look for the stack pointer as a scratch register,
5592 it would cause trouble if an interrupt occurred. */
5593 unsigned attempt = 0x7fff, used;
5594 int jump_left = flag_expensive_optimizations + 1;
5596 /* It is likely that the most recent eligible instruction is wanted for
5597 the delay slot. Therefore, find out which registers it uses, and
5598 try to avoid using them. */
5600 for (scan = jump; (scan = PREV_INSN (scan)); )
5602 enum rtx_code code;
5604 if (INSN_DELETED_P (scan))
5605 continue;
5606 code = GET_CODE (scan);
5607 if (code == CODE_LABEL || code == JUMP_INSN)
5608 break;
5609 if (code == INSN
5610 && GET_CODE (PATTERN (scan)) != USE
5611 && GET_CODE (PATTERN (scan)) != CLOBBER
5612 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5614 attempt &= ~regs_used (PATTERN (scan), 0);
5615 break;
5618 for (used = dead = 0, scan = JUMP_LABEL (jump);
5619 (scan = NEXT_INSN (scan)); )
5621 enum rtx_code code;
5623 if (INSN_DELETED_P (scan))
5624 continue;
5625 code = GET_CODE (scan);
5626 if (INSN_P (scan))
5628 used |= regs_used (PATTERN (scan), 0);
5629 if (code == CALL_INSN)
5630 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5631 dead |= (used >> 16) & ~used;
5632 if (dead & attempt)
5634 dead &= attempt;
5635 break;
5637 if (code == JUMP_INSN)
5639 if (jump_left-- && simplejump_p (scan))
5640 scan = JUMP_LABEL (scan);
5641 else
5642 break;
5646 /* Mask out the stack pointer again, in case it was
5647 the only 'free' register we have found. */
5648 dead &= 0x7fff;
5650 /* If the immediate destination is still in range, check for possible
5651 threading with a jump beyond the delay slot insn.
5652 Don't check if we are called recursively; the jump has been or will be
5653 checked in a different invocation then. */
5655 else if (optimize && need_block >= 0)
5657 rtx next = next_active_insn (next_active_insn (dest));
5658 if (next && JUMP_P (next)
5659 && GET_CODE (PATTERN (next)) == SET
5660 && recog_memoized (next) == CODE_FOR_jump_compact)
5662 dest = JUMP_LABEL (next);
5663 if (dest
5664 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5665 > 4092 + 4098))
5666 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5670 if (dead)
5672 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5674 /* It would be nice if we could convert the jump into an indirect
5675 jump / far branch right now, and thus exposing all constituent
5676 instructions to further optimization. However, reorg uses
5677 simplejump_p to determine if there is an unconditional jump where
5678 it should try to schedule instructions from the target of the
5679 branch; simplejump_p fails for indirect jumps even if they have
5680 a JUMP_LABEL. */
5681 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5682 (reg, GEN_INT (unspec_bbr_uid++)),
5683 jump);
5684 /* ??? We would like this to have the scope of the jump, but that
5685 scope will change when a delay slot insn of an inner scope is added.
5686 Hence, after delay slot scheduling, we'll have to expect
5687 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5688 the jump. */
5690 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5691 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5692 return insn;
5694 else if (need_block)
5695 /* We can't use JUMP_LABEL here because it might be undefined
5696 when not optimizing. */
5697 return emit_insn_before (gen_block_branch_redirect
5698 (GEN_INT (unspec_bbr_uid++)),
5699 jump);
5700 return prev;
5703 #define CONDJUMP_MIN -252
5704 #define CONDJUMP_MAX 262
5705 struct far_branch
5707 /* A label (to be placed) in front of the jump
5708 that jumps to our ultimate destination. */
5709 rtx near_label;
5710 /* Where we are going to insert it if we cannot move the jump any farther,
5711 or the jump itself if we have picked up an existing jump. */
5712 rtx insert_place;
5713 /* The ultimate destination. */
5714 rtx far_label;
5715 struct far_branch *prev;
5716 /* If the branch has already been created, its address;
5717 else the address of its first prospective user. */
5718 int address;
5721 static void gen_far_branch (struct far_branch *);
5722 enum mdep_reorg_phase_e mdep_reorg_phase;
5723 static void
5724 gen_far_branch (struct far_branch *bp)
5726 rtx insn = bp->insert_place;
5727 rtx jump;
5728 rtx label = gen_label_rtx ();
5729 int ok;
5731 emit_label_after (label, insn);
5732 if (bp->far_label)
5734 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5735 LABEL_NUSES (bp->far_label)++;
5737 else
5738 jump = emit_jump_insn_after (gen_return (), insn);
5740 /* Emit a barrier so that reorg knows that any following instructions
5741 are not reachable via a fall-through path.
5742 But don't do this when not optimizing, since we wouldn't suppress the
5743 alignment for the barrier then, and could end up with out-of-range
5744 pc-relative loads. */
5745 if (optimize)
5746 emit_barrier_after (jump);
5747 emit_label_after (bp->near_label, insn);
5749 if (bp->far_label)
5750 JUMP_LABEL (jump) = bp->far_label;
5751 else
5753 rtx pat = PATTERN (jump);
5754 gcc_assert (ANY_RETURN_P (pat));
5755 JUMP_LABEL (jump) = pat;
5758 ok = invert_jump (insn, label, 1);
5759 gcc_assert (ok);
5761 /* If we are branching around a jump (rather than a return), prevent
5762 reorg from using an insn from the jump target as the delay slot insn -
5763 when reorg did this, it pessimized code (we rather hide the delay slot)
5764 and it could cause branches to go out of range. */
5765 if (bp->far_label)
5766 (emit_insn_after
5767 (gen_stuff_delay_slot
5768 (GEN_INT (unspec_bbr_uid++),
5769 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5770 insn));
5771 /* Prevent reorg from undoing our splits. */
5772 gen_block_redirect (jump, bp->address += 2, 2);
5775 /* Fix up ADDR_DIFF_VECs. */
5776 void
5777 fixup_addr_diff_vecs (rtx first)
5779 rtx insn;
5781 for (insn = first; insn; insn = NEXT_INSN (insn))
5783 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5785 if (!JUMP_P (insn)
5786 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5787 continue;
5788 pat = PATTERN (insn);
5789 vec_lab = XEXP (XEXP (pat, 0), 0);
5791 /* Search the matching casesi_jump_2. */
5792 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5794 if (!JUMP_P (prev))
5795 continue;
5796 prevpat = PATTERN (prev);
5797 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5798 continue;
5799 x = XVECEXP (prevpat, 0, 1);
5800 if (GET_CODE (x) != USE)
5801 continue;
5802 x = XEXP (x, 0);
5803 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5804 break;
5806 /* FIXME: This is a bug in the optimizer, but it seems harmless
5807 to just avoid panicing. */
5808 if (!prev)
5809 continue;
5811 /* Emit the reference label of the braf where it belongs, right after
5812 the casesi_jump_2 (i.e. braf). */
5813 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5814 emit_label_after (braf_label, prev);
5816 /* Fix up the ADDR_DIF_VEC to be relative
5817 to the reference address of the braf. */
5818 XEXP (XEXP (pat, 0), 0) = braf_label;
5822 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5823 a barrier. Return the base 2 logarithm of the desired alignment. */
5825 barrier_align (rtx barrier_or_label)
5827 rtx next = next_real_insn (barrier_or_label), pat, prev;
5829 if (! next)
5830 return 0;
5832 pat = PATTERN (next);
5834 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5835 return 2;
5837 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5838 /* This is a barrier in front of a constant table. */
5839 return 0;
5841 prev = prev_real_insn (barrier_or_label);
5842 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5844 pat = PATTERN (prev);
5845 /* If this is a very small table, we want to keep the alignment after
5846 the table to the minimum for proper code alignment. */
5847 return ((optimize_size
5848 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5849 <= (unsigned) 1 << (CACHE_LOG - 2)))
5850 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5853 if (optimize_size)
5854 return 0;
5856 if (! TARGET_SH2 || ! optimize)
5857 return align_jumps_log;
5859 /* When fixing up pcloads, a constant table might be inserted just before
5860 the basic block that ends with the barrier. Thus, we can't trust the
5861 instruction lengths before that. */
5862 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5864 /* Check if there is an immediately preceding branch to the insn beyond
5865 the barrier. We must weight the cost of discarding useful information
5866 from the current cache line when executing this branch and there is
5867 an alignment, against that of fetching unneeded insn in front of the
5868 branch target when there is no alignment. */
5870 /* There are two delay_slot cases to consider. One is the simple case
5871 where the preceding branch is to the insn beyond the barrier (simple
5872 delay slot filling), and the other is where the preceding branch has
5873 a delay slot that is a duplicate of the insn after the barrier
5874 (fill_eager_delay_slots) and the branch is to the insn after the insn
5875 after the barrier. */
5877 /* PREV is presumed to be the JUMP_INSN for the barrier under
5878 investigation. Skip to the insn before it. */
5880 int slot, credit;
5881 bool jump_to_next = false;
5883 prev = prev_real_insn (prev);
5885 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5886 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5887 prev = prev_real_insn (prev))
5889 jump_to_next = false;
5890 if (GET_CODE (PATTERN (prev)) == USE
5891 || GET_CODE (PATTERN (prev)) == CLOBBER)
5892 continue;
5893 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5895 prev = XVECEXP (PATTERN (prev), 0, 1);
5896 if (INSN_UID (prev) == INSN_UID (next))
5898 /* Delay slot was filled with insn at jump target. */
5899 jump_to_next = true;
5900 continue;
5904 if (slot &&
5905 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5906 slot = 0;
5907 credit -= get_attr_length (prev);
5909 if (prev && jump_to_label_p (prev))
5911 rtx x;
5912 if (jump_to_next
5913 || next_real_insn (JUMP_LABEL (prev)) == next
5914 /* If relax_delay_slots() decides NEXT was redundant
5915 with some previous instruction, it will have
5916 redirected PREV's jump to the following insn. */
5917 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5918 /* There is no upper bound on redundant instructions
5919 that might have been skipped, but we must not put an
5920 alignment where none had been before. */
5921 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5922 (INSN_P (x)
5923 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5924 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5925 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5927 rtx pat = PATTERN (prev);
5928 if (GET_CODE (pat) == PARALLEL)
5929 pat = XVECEXP (pat, 0, 0);
5930 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5931 return 0;
5936 return align_jumps_log;
5939 /* If we are inside a phony loop, almost any kind of label can turn up as the
5940 first one in the loop. Aligning a braf label causes incorrect switch
5941 destination addresses; we can detect braf labels because they are
5942 followed by a BARRIER.
5943 Applying loop alignment to small constant or switch tables is a waste
5944 of space, so we suppress this too. */
5946 sh_loop_align (rtx label)
5948 rtx next = label;
5950 if (! optimize || optimize_size)
5951 return 0;
5954 next = next_nonnote_insn (next);
5955 while (next && LABEL_P (next));
5957 if (! next
5958 || ! INSN_P (next)
5959 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5960 || recog_memoized (next) == CODE_FOR_consttable_2)
5961 return 0;
5963 return align_loops_log;
5966 /* Do a final pass over the function, just before delayed branch
5967 scheduling. */
5969 static void
5970 sh_reorg (void)
5972 rtx first, insn, mova = NULL_RTX;
5973 int num_mova;
5974 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5975 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5977 first = get_insns ();
5978 max_labelno_before_reorg = max_label_num ();
5980 /* We must split call insns before introducing `mova's. If we're
5981 optimizing, they'll have already been split. Otherwise, make
5982 sure we don't split them too late. */
5983 if (! optimize)
5984 split_all_insns_noflow ();
5986 if (TARGET_SHMEDIA)
5987 return;
5989 /* If relaxing, generate pseudo-ops to associate function calls with
5990 the symbols they call. It does no harm to not generate these
5991 pseudo-ops. However, when we can generate them, it enables the
5992 linker to potentially relax the jsr to a bsr, and eliminate the
5993 register load and, possibly, the constant pool entry. */
5995 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5996 if (TARGET_RELAX)
5998 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5999 own purposes. This works because none of the remaining passes
6000 need to look at them.
6002 ??? But it may break in the future. We should use a machine
6003 dependent REG_NOTE, or some other approach entirely. */
6004 for (insn = first; insn; insn = NEXT_INSN (insn))
6006 if (INSN_P (insn))
6008 rtx note;
6010 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6011 NULL_RTX)) != 0)
6012 remove_note (insn, note);
6016 for (insn = first; insn; insn = NEXT_INSN (insn))
6018 rtx pattern, reg, link, set, scan, dies, label;
6019 int rescan = 0, foundinsn = 0;
6021 if (CALL_P (insn))
6023 pattern = PATTERN (insn);
6025 if (GET_CODE (pattern) == PARALLEL)
6026 pattern = XVECEXP (pattern, 0, 0);
6027 if (GET_CODE (pattern) == SET)
6028 pattern = SET_SRC (pattern);
6030 if (GET_CODE (pattern) != CALL
6031 || !MEM_P (XEXP (pattern, 0)))
6032 continue;
6034 reg = XEXP (XEXP (pattern, 0), 0);
6036 else
6038 reg = sfunc_uses_reg (insn);
6039 if (! reg)
6040 continue;
6043 if (!REG_P (reg))
6044 continue;
6046 /* Try scanning backward to find where the register is set. */
6047 link = NULL;
6048 for (scan = PREV_INSN (insn);
6049 scan && !LABEL_P (scan);
6050 scan = PREV_INSN (scan))
6052 if (! INSN_P (scan))
6053 continue;
6055 if (! reg_mentioned_p (reg, scan))
6056 continue;
6058 if (noncall_uses_reg (reg, scan, &set))
6059 break;
6061 if (set)
6063 link = scan;
6064 break;
6068 if (! link)
6069 continue;
6071 /* The register is set at LINK. */
6073 /* We can only optimize the function call if the register is
6074 being set to a symbol. In theory, we could sometimes
6075 optimize calls to a constant location, but the assembler
6076 and linker do not support that at present. */
6077 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6078 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6079 continue;
6081 /* Scan forward from LINK to the place where REG dies, and
6082 make sure that the only insns which use REG are
6083 themselves function calls. */
6085 /* ??? This doesn't work for call targets that were allocated
6086 by reload, since there may not be a REG_DEAD note for the
6087 register. */
6089 dies = NULL_RTX;
6090 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6092 rtx scanset;
6094 /* Don't try to trace forward past a CODE_LABEL if we haven't
6095 seen INSN yet. Ordinarily, we will only find the setting insn
6096 if it is in the same basic block. However,
6097 cross-jumping can insert code labels in between the load and
6098 the call, and can result in situations where a single call
6099 insn may have two targets depending on where we came from. */
6101 if (LABEL_P (scan) && ! foundinsn)
6102 break;
6104 if (! INSN_P (scan))
6105 continue;
6107 /* Don't try to trace forward past a JUMP. To optimize
6108 safely, we would have to check that all the
6109 instructions at the jump destination did not use REG. */
6111 if (JUMP_P (scan))
6112 break;
6114 if (! reg_mentioned_p (reg, scan))
6115 continue;
6117 if (noncall_uses_reg (reg, scan, &scanset))
6118 break;
6120 if (scan == insn)
6121 foundinsn = 1;
6123 if (scan != insn
6124 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6126 /* There is a function call to this register other
6127 than the one we are checking. If we optimize
6128 this call, we need to rescan again below. */
6129 rescan = 1;
6132 /* ??? We shouldn't have to worry about SCANSET here.
6133 We should just be able to check for a REG_DEAD note
6134 on a function call. However, the REG_DEAD notes are
6135 apparently not dependable around libcalls; c-torture
6136 execute/920501-2 is a test case. If SCANSET is set,
6137 then this insn sets the register, so it must have
6138 died earlier. Unfortunately, this will only handle
6139 the cases in which the register is, in fact, set in a
6140 later insn. */
6142 /* ??? We shouldn't have to use FOUNDINSN here.
6143 This dates back to when we used LOG_LINKS to find
6144 the most recent insn which sets the register. */
6146 if (foundinsn
6147 && (scanset
6148 || find_reg_note (scan, REG_DEAD, reg)))
6150 dies = scan;
6151 break;
6155 if (! dies)
6157 /* Either there was a branch, or some insn used REG
6158 other than as a function call address. */
6159 continue;
6162 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6163 on the insn which sets the register, and on each call insn
6164 which uses the register. In final_prescan_insn we look for
6165 the REG_LABEL_OPERAND notes, and output the appropriate label
6166 or pseudo-op. */
6168 label = gen_label_rtx ();
6169 add_reg_note (link, REG_LABEL_OPERAND, label);
6170 add_reg_note (insn, REG_LABEL_OPERAND, label);
6171 if (rescan)
6173 scan = link;
6176 rtx reg2;
6178 scan = NEXT_INSN (scan);
6179 if (scan != insn
6180 && ((CALL_P (scan)
6181 && reg_mentioned_p (reg, scan))
6182 || ((reg2 = sfunc_uses_reg (scan))
6183 && REGNO (reg2) == REGNO (reg))))
6184 add_reg_note (scan, REG_LABEL_OPERAND, label);
6186 while (scan != dies);
6191 if (TARGET_SH2)
6192 fixup_addr_diff_vecs (first);
6194 if (optimize)
6196 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6197 shorten_branches (first);
6200 /* Scan the function looking for move instructions which have to be
6201 changed to pc-relative loads and insert the literal tables. */
6202 label_ref_list_pool = create_alloc_pool ("label references list",
6203 sizeof (struct label_ref_list_d),
6204 30);
6205 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6206 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6208 if (mova_p (insn))
6210 /* ??? basic block reordering can move a switch table dispatch
6211 below the switch table. Check if that has happened.
6212 We only have the addresses available when optimizing; but then,
6213 this check shouldn't be needed when not optimizing. */
6214 if (!untangle_mova (&num_mova, &mova, insn))
6216 insn = mova;
6217 num_mova = 0;
6220 else if (JUMP_P (insn)
6221 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6222 && num_mova
6223 /* ??? loop invariant motion can also move a mova out of a
6224 loop. Since loop does this code motion anyway, maybe we
6225 should wrap UNSPEC_MOVA into a CONST, so that reload can
6226 move it back. */
6227 && ((num_mova > 1
6228 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6229 || (prev_nonnote_insn (insn)
6230 == XEXP (MOVA_LABELREF (mova), 0))))
6232 rtx scan;
6233 int total;
6235 num_mova--;
6237 /* Some code might have been inserted between the mova and
6238 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6239 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6240 total += get_attr_length (scan);
6242 /* range of mova is 1020, add 4 because pc counts from address of
6243 second instruction after this one, subtract 2 in case pc is 2
6244 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6245 cancels out with alignment effects of the mova itself. */
6246 if (total > 1022)
6248 /* Change the mova into a load, and restart scanning
6249 there. broken_move will then return true for mova. */
6250 fixup_mova (mova);
6251 insn = mova;
6254 if (broken_move (insn)
6255 || (NONJUMP_INSN_P (insn)
6256 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6258 rtx scan;
6259 /* Scan ahead looking for a barrier to stick the constant table
6260 behind. */
6261 rtx barrier = find_barrier (num_mova, mova, insn);
6262 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
6263 int need_aligned_label = 0;
6265 if (num_mova && ! mova_p (mova))
6267 /* find_barrier had to change the first mova into a
6268 pcload; thus, we have to start with this new pcload. */
6269 insn = mova;
6270 num_mova = 0;
6272 /* Now find all the moves between the points and modify them. */
6273 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6275 if (LABEL_P (scan))
6276 last_float = 0;
6277 if (NONJUMP_INSN_P (scan)
6278 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6279 need_aligned_label = 1;
6280 if (broken_move (scan))
6282 rtx *patp = &PATTERN (scan), pat = *patp;
6283 rtx src, dst;
6284 rtx lab;
6285 rtx newsrc;
6286 enum machine_mode mode;
6288 if (GET_CODE (pat) == PARALLEL)
6289 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6290 src = SET_SRC (pat);
6291 dst = SET_DEST (pat);
6292 mode = GET_MODE (dst);
6294 if (mode == SImode && satisfies_constraint_I16 (src)
6295 && REGNO (dst) != FPUL_REG)
6297 int offset = 0;
6299 mode = HImode;
6300 while (GET_CODE (dst) == SUBREG)
6302 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6303 GET_MODE (SUBREG_REG (dst)),
6304 SUBREG_BYTE (dst),
6305 GET_MODE (dst));
6306 dst = SUBREG_REG (dst);
6308 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6310 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6312 /* This must be an insn that clobbers r0. */
6313 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6314 XVECLEN (PATTERN (scan), 0)
6315 - 1);
6316 rtx clobber = *clobberp;
6318 gcc_assert (GET_CODE (clobber) == CLOBBER
6319 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6321 if (last_float
6322 && reg_set_between_p (r0_rtx, last_float_move, scan))
6323 last_float = 0;
6324 if (last_float
6325 && TARGET_SHCOMPACT
6326 && GET_MODE_SIZE (mode) != 4
6327 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6328 last_float = 0;
6329 lab = add_constant (src, mode, last_float);
6330 if (lab)
6331 emit_insn_before (gen_mova (lab), scan);
6332 else
6334 /* There will be a REG_UNUSED note for r0 on
6335 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6336 lest reorg:mark_target_live_regs will not
6337 consider r0 to be used, and we end up with delay
6338 slot insn in front of SCAN that clobbers r0. */
6339 rtx note
6340 = find_regno_note (last_float_move, REG_UNUSED, 0);
6342 /* If we are not optimizing, then there may not be
6343 a note. */
6344 if (note)
6345 PUT_REG_NOTE_KIND (note, REG_INC);
6347 *last_float_addr = r0_inc_rtx;
6349 last_float_move = scan;
6350 last_float = src;
6351 newsrc = gen_const_mem (mode,
6352 (((TARGET_SH4 && ! TARGET_FMOVD)
6353 || REGNO (dst) == FPUL_REG)
6354 ? r0_inc_rtx
6355 : r0_rtx));
6356 last_float_addr = &XEXP (newsrc, 0);
6358 /* Remove the clobber of r0. */
6359 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6360 gen_rtx_SCRATCH (Pmode));
6362 /* This is a mova needing a label. Create it. */
6363 else if (GET_CODE (src) == UNSPEC
6364 && XINT (src, 1) == UNSPEC_MOVA
6365 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6367 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6368 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6369 newsrc = gen_rtx_UNSPEC (SImode,
6370 gen_rtvec (1, newsrc),
6371 UNSPEC_MOVA);
6373 else
6375 lab = add_constant (src, mode, 0);
6376 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6377 newsrc = gen_const_mem (mode, newsrc);
6379 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
6380 INSN_CODE (scan) = -1;
6383 dump_table (need_aligned_label ? insn : 0, barrier);
6384 insn = barrier;
6387 free_alloc_pool (label_ref_list_pool);
6388 for (insn = first; insn; insn = NEXT_INSN (insn))
6389 PUT_MODE (insn, VOIDmode);
6391 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6392 INSN_ADDRESSES_FREE ();
6393 split_branches (first);
6395 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6396 also has an effect on the register that holds the address of the sfunc.
6397 Insert an extra dummy insn in front of each sfunc that pretends to
6398 use this register. */
6399 if (flag_delayed_branch)
6401 for (insn = first; insn; insn = NEXT_INSN (insn))
6403 rtx reg = sfunc_uses_reg (insn);
6405 if (! reg)
6406 continue;
6407 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6410 #if 0
6411 /* fpscr is not actually a user variable, but we pretend it is for the
6412 sake of the previous optimization passes, since we want it handled like
6413 one. However, we don't have any debugging information for it, so turn
6414 it into a non-user variable now. */
6415 if (TARGET_SH4)
6416 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
6417 #endif
6418 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6422 get_dest_uid (rtx label, int max_uid)
6424 rtx dest = next_real_insn (label);
6425 int dest_uid;
6426 if (! dest)
6427 /* This can happen for an undefined label. */
6428 return 0;
6429 dest_uid = INSN_UID (dest);
6430 /* If this is a newly created branch redirection blocking instruction,
6431 we cannot index the branch_uid or insn_addresses arrays with its
6432 uid. But then, we won't need to, because the actual destination is
6433 the following branch. */
6434 while (dest_uid >= max_uid)
6436 dest = NEXT_INSN (dest);
6437 dest_uid = INSN_UID (dest);
6439 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6440 return 0;
6441 return dest_uid;
6444 /* Split condbranches that are out of range. Also add clobbers for
6445 scratch registers that are needed in far jumps.
6446 We do this before delay slot scheduling, so that it can take our
6447 newly created instructions into account. It also allows us to
6448 find branches with common targets more easily. */
6450 static void
6451 split_branches (rtx first)
6453 rtx insn;
6454 struct far_branch **uid_branch, *far_branch_list = 0;
6455 int max_uid = get_max_uid ();
6456 int ok;
6458 /* Find out which branches are out of range. */
6459 shorten_branches (first);
6461 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6462 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6464 for (insn = first; insn; insn = NEXT_INSN (insn))
6465 if (! INSN_P (insn))
6466 continue;
6467 else if (INSN_DELETED_P (insn))
6469 /* Shorten_branches would split this instruction again,
6470 so transform it into a note. */
6471 SET_INSN_DELETED (insn);
6473 else if (JUMP_P (insn)
6474 /* Don't mess with ADDR_DIFF_VEC */
6475 && (GET_CODE (PATTERN (insn)) == SET
6476 || GET_CODE (PATTERN (insn)) == RETURN))
6478 enum attr_type type = get_attr_type (insn);
6479 if (type == TYPE_CBRANCH)
6481 rtx next, beyond;
6483 if (get_attr_length (insn) > 4)
6485 rtx src = SET_SRC (PATTERN (insn));
6486 rtx olabel = XEXP (XEXP (src, 1), 0);
6487 int addr = INSN_ADDRESSES (INSN_UID (insn));
6488 rtx label = 0;
6489 int dest_uid = get_dest_uid (olabel, max_uid);
6490 struct far_branch *bp = uid_branch[dest_uid];
6492 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6493 the label if the LABEL_NUSES count drops to zero. There is
6494 always a jump_optimize pass that sets these values, but it
6495 proceeds to delete unreferenced code, and then if not
6496 optimizing, to un-delete the deleted instructions, thus
6497 leaving labels with too low uses counts. */
6498 if (! optimize)
6500 JUMP_LABEL (insn) = olabel;
6501 LABEL_NUSES (olabel)++;
6503 if (! bp)
6505 bp = (struct far_branch *) alloca (sizeof *bp);
6506 uid_branch[dest_uid] = bp;
6507 bp->prev = far_branch_list;
6508 far_branch_list = bp;
6509 bp->far_label
6510 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6511 LABEL_NUSES (bp->far_label)++;
6513 else
6515 label = bp->near_label;
6516 if (! label && bp->address - addr >= CONDJUMP_MIN)
6518 rtx block = bp->insert_place;
6520 if (GET_CODE (PATTERN (block)) == RETURN)
6521 block = PREV_INSN (block);
6522 else
6523 block = gen_block_redirect (block,
6524 bp->address, 2);
6525 label = emit_label_after (gen_label_rtx (),
6526 PREV_INSN (block));
6527 bp->near_label = label;
6529 else if (label && ! NEXT_INSN (label))
6531 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6532 bp->insert_place = insn;
6533 else
6534 gen_far_branch (bp);
6537 if (! label
6538 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6540 bp->near_label = label = gen_label_rtx ();
6541 bp->insert_place = insn;
6542 bp->address = addr;
6544 ok = redirect_jump (insn, label, 0);
6545 gcc_assert (ok);
6547 else
6549 /* get_attr_length (insn) == 2 */
6550 /* Check if we have a pattern where reorg wants to redirect
6551 the branch to a label from an unconditional branch that
6552 is too far away. */
6553 /* We can't use JUMP_LABEL here because it might be undefined
6554 when not optimizing. */
6555 /* A syntax error might cause beyond to be NULL_RTX. */
6556 beyond
6557 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6558 0));
6560 if (beyond
6561 && (JUMP_P (beyond)
6562 || ((beyond = next_active_insn (beyond))
6563 && JUMP_P (beyond)))
6564 && GET_CODE (PATTERN (beyond)) == SET
6565 && recog_memoized (beyond) == CODE_FOR_jump_compact
6566 && ((INSN_ADDRESSES
6567 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6568 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6569 > 252 + 258 + 2))
6570 gen_block_redirect (beyond,
6571 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6574 next = next_active_insn (insn);
6576 if (next
6577 && (JUMP_P (next)
6578 || ((next = next_active_insn (next))
6579 && JUMP_P (next)))
6580 && GET_CODE (PATTERN (next)) == SET
6581 && recog_memoized (next) == CODE_FOR_jump_compact
6582 && ((INSN_ADDRESSES
6583 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6584 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6585 > 252 + 258 + 2))
6586 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6588 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6590 int addr = INSN_ADDRESSES (INSN_UID (insn));
6591 rtx far_label = 0;
6592 int dest_uid = 0;
6593 struct far_branch *bp;
6595 if (type == TYPE_JUMP)
6597 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6598 dest_uid = get_dest_uid (far_label, max_uid);
6599 if (! dest_uid)
6601 /* Parse errors can lead to labels outside
6602 the insn stream. */
6603 if (! NEXT_INSN (far_label))
6604 continue;
6606 if (! optimize)
6608 JUMP_LABEL (insn) = far_label;
6609 LABEL_NUSES (far_label)++;
6611 redirect_jump (insn, ret_rtx, 1);
6612 far_label = 0;
6615 bp = uid_branch[dest_uid];
6616 if (! bp)
6618 bp = (struct far_branch *) alloca (sizeof *bp);
6619 uid_branch[dest_uid] = bp;
6620 bp->prev = far_branch_list;
6621 far_branch_list = bp;
6622 bp->near_label = 0;
6623 bp->far_label = far_label;
6624 if (far_label)
6625 LABEL_NUSES (far_label)++;
6627 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6628 if (addr - bp->address <= CONDJUMP_MAX)
6629 emit_label_after (bp->near_label, PREV_INSN (insn));
6630 else
6632 gen_far_branch (bp);
6633 bp->near_label = 0;
6635 else
6636 bp->near_label = 0;
6637 bp->address = addr;
6638 bp->insert_place = insn;
6639 if (! far_label)
6640 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6641 else
6642 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6645 /* Generate all pending far branches,
6646 and free our references to the far labels. */
6647 while (far_branch_list)
6649 if (far_branch_list->near_label
6650 && ! NEXT_INSN (far_branch_list->near_label))
6651 gen_far_branch (far_branch_list);
6652 if (optimize
6653 && far_branch_list->far_label
6654 && ! --LABEL_NUSES (far_branch_list->far_label))
6655 delete_insn (far_branch_list->far_label);
6656 far_branch_list = far_branch_list->prev;
6659 /* Instruction length information is no longer valid due to the new
6660 instructions that have been generated. */
6661 init_insn_lengths ();
6664 /* Dump out instruction addresses, which is useful for debugging the
6665 constant pool table stuff.
6667 If relaxing, output the label and pseudo-ops used to link together
6668 calls and the instruction which set the registers. */
6670 /* ??? The addresses printed by this routine for insns are nonsense for
6671 insns which are inside of a sequence where none of the inner insns have
6672 variable length. This is because the second pass of shorten_branches
6673 does not bother to update them. */
6675 void
6676 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6677 int noperands ATTRIBUTE_UNUSED)
6679 if (TARGET_DUMPISIZE)
6680 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6682 if (TARGET_RELAX)
6684 rtx note;
6686 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6687 if (note)
6689 rtx pattern;
6691 pattern = PATTERN (insn);
6692 if (GET_CODE (pattern) == PARALLEL)
6693 pattern = XVECEXP (pattern, 0, 0);
6694 switch (GET_CODE (pattern))
6696 case SET:
6697 if (GET_CODE (SET_SRC (pattern)) != CALL
6698 && get_attr_type (insn) != TYPE_SFUNC)
6700 targetm.asm_out.internal_label
6701 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6702 break;
6704 /* else FALLTHROUGH */
6705 case CALL:
6706 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6707 CODE_LABEL_NUMBER (XEXP (note, 0)));
6708 break;
6710 default:
6711 gcc_unreachable ();
6717 /* Dump out any constants accumulated in the final pass. These will
6718 only be labels. */
6720 const char *
6721 output_jump_label_table (void)
6723 int i;
6725 if (pool_size)
6727 fprintf (asm_out_file, "\t.align 2\n");
6728 for (i = 0; i < pool_size; i++)
6730 pool_node *p = &pool_vector[i];
6732 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6733 CODE_LABEL_NUMBER (p->label));
6734 output_asm_insn (".long %O0", &p->value);
6736 pool_size = 0;
6739 return "";
6742 /* A full frame looks like:
6744 arg-5
6745 arg-4
6746 [ if current_function_anonymous_args
6747 arg-3
6748 arg-2
6749 arg-1
6750 arg-0 ]
6751 saved-fp
6752 saved-r10
6753 saved-r11
6754 saved-r12
6755 saved-pr
6756 local-n
6758 local-1
6759 local-0 <- fp points here. */
6761 /* Number of bytes pushed for anonymous args, used to pass information
6762 between expand_prologue and expand_epilogue. */
6764 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6765 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6766 for an epilogue and a negative value means that it's for a sibcall
6767 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6768 all the registers that are about to be restored, and hence dead. */
6770 static void
6771 output_stack_adjust (int size, rtx reg, int epilogue_p,
6772 HARD_REG_SET *live_regs_mask, bool frame_p)
6774 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6775 if (size)
6777 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6779 /* This test is bogus, as output_stack_adjust is used to re-align the
6780 stack. */
6781 #if 0
6782 gcc_assert (!(size % align));
6783 #endif
6785 if (CONST_OK_FOR_ADD (size))
6786 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6787 /* Try to do it with two partial adjustments; however, we must make
6788 sure that the stack is properly aligned at all times, in case
6789 an interrupt occurs between the two partial adjustments. */
6790 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6791 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6793 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6794 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6796 else
6798 rtx const_reg;
6799 rtx insn;
6800 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6801 int i;
6803 /* If TEMP is invalid, we could temporarily save a general
6804 register to MACL. However, there is currently no need
6805 to handle this case, so just die when we see it. */
6806 if (epilogue_p < 0
6807 || current_function_interrupt
6808 || ! call_really_used_regs[temp] || fixed_regs[temp])
6809 temp = -1;
6810 if (temp < 0 && ! current_function_interrupt
6811 && (TARGET_SHMEDIA || epilogue_p >= 0))
6813 HARD_REG_SET temps;
6814 COPY_HARD_REG_SET (temps, call_used_reg_set);
6815 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6816 if (epilogue_p > 0)
6818 int nreg = 0;
6819 if (crtl->return_rtx)
6821 enum machine_mode mode;
6822 mode = GET_MODE (crtl->return_rtx);
6823 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6824 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6826 for (i = 0; i < nreg; i++)
6827 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6828 if (crtl->calls_eh_return)
6830 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6831 for (i = 0; i <= 3; i++)
6832 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6835 if (TARGET_SHMEDIA && epilogue_p < 0)
6836 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6837 CLEAR_HARD_REG_BIT (temps, i);
6838 if (epilogue_p <= 0)
6840 for (i = FIRST_PARM_REG;
6841 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6842 CLEAR_HARD_REG_BIT (temps, i);
6843 if (cfun->static_chain_decl != NULL)
6844 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6846 temp = scavenge_reg (&temps);
6848 if (temp < 0 && live_regs_mask)
6850 HARD_REG_SET temps;
6852 COPY_HARD_REG_SET (temps, *live_regs_mask);
6853 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6854 temp = scavenge_reg (&temps);
6856 if (temp < 0)
6858 rtx adj_reg, tmp_reg, mem;
6860 /* If we reached here, the most likely case is the (sibcall)
6861 epilogue for non SHmedia. Put a special push/pop sequence
6862 for such case as the last resort. This looks lengthy but
6863 would not be problem because it seems to be very
6864 rare. */
6866 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6869 /* ??? There is still the slight possibility that r4 or
6870 r5 have been reserved as fixed registers or assigned
6871 as global registers, and they change during an
6872 interrupt. There are possible ways to handle this:
6874 - If we are adjusting the frame pointer (r14), we can do
6875 with a single temp register and an ordinary push / pop
6876 on the stack.
6877 - Grab any call-used or call-saved registers (i.e. not
6878 fixed or globals) for the temps we need. We might
6879 also grab r14 if we are adjusting the stack pointer.
6880 If we can't find enough available registers, issue
6881 a diagnostic and die - the user must have reserved
6882 way too many registers.
6883 But since all this is rather unlikely to happen and
6884 would require extra testing, we just die if r4 / r5
6885 are not available. */
6886 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6887 && !global_regs[4] && !global_regs[5]);
6889 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6890 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6891 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6892 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6893 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6894 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6895 emit_move_insn (mem, tmp_reg);
6896 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6897 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6898 emit_move_insn (mem, tmp_reg);
6899 emit_move_insn (reg, adj_reg);
6900 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6901 emit_move_insn (adj_reg, mem);
6902 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6903 emit_move_insn (tmp_reg, mem);
6904 /* Tell flow the insns that pop r4/r5 aren't dead. */
6905 emit_use (tmp_reg);
6906 emit_use (adj_reg);
6907 return;
6909 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6911 /* If SIZE is negative, subtract the positive value.
6912 This sometimes allows a constant pool entry to be shared
6913 between prologue and epilogue code. */
6914 if (size < 0)
6916 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6917 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6919 else
6921 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6922 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6924 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6925 gen_rtx_SET (VOIDmode, reg,
6926 gen_rtx_PLUS (SImode, reg,
6927 GEN_INT (size))));
6932 static rtx
6933 frame_insn (rtx x)
6935 x = emit_insn (x);
6936 RTX_FRAME_RELATED_P (x) = 1;
6937 return x;
6940 /* Output RTL to push register RN onto the stack. */
6942 static rtx
6943 push (int rn)
6945 rtx x;
6946 if (rn == FPUL_REG)
6947 x = gen_push_fpul ();
6948 else if (rn == FPSCR_REG)
6949 x = gen_push_fpscr ();
6950 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6951 && FP_OR_XD_REGISTER_P (rn))
6953 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6954 return NULL_RTX;
6955 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6957 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6958 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6959 else
6960 x = gen_push (gen_rtx_REG (SImode, rn));
6962 x = frame_insn (x);
6963 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6964 return x;
6967 /* Output RTL to pop register RN from the stack. */
6969 static void
6970 pop (int rn)
6972 rtx x, sp_reg, reg;
6973 if (rn == FPUL_REG)
6974 x = gen_pop_fpul ();
6975 else if (rn == FPSCR_REG)
6976 x = gen_pop_fpscr ();
6977 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
6978 && FP_OR_XD_REGISTER_P (rn))
6980 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6981 return;
6982 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6984 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6985 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6986 else
6987 x = gen_pop (gen_rtx_REG (SImode, rn));
6989 x = emit_insn (x);
6991 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6992 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6993 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6994 : SET_DEST (PATTERN (x)));
6995 add_reg_note (x, REG_CFA_RESTORE, reg);
6996 add_reg_note (x, REG_CFA_ADJUST_CFA,
6997 gen_rtx_SET (SImode, sp_reg,
6998 plus_constant (SImode, sp_reg,
6999 GET_MODE_SIZE (GET_MODE (reg)))));
7000 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7001 RTX_FRAME_RELATED_P (x) = 1;
7004 /* Generate code to push the regs specified in the mask. */
7006 static void
7007 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7009 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7010 int skip_fpscr = 0;
7012 /* Push PR last; this gives better latencies after the prologue, and
7013 candidates for the return delay slot when there are no general
7014 registers pushed. */
7015 for (; i < FIRST_PSEUDO_REGISTER; i++)
7017 /* If this is an interrupt handler, and the SZ bit varies,
7018 and we have to push any floating point register, we need
7019 to switch to the correct precision first. */
7020 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7021 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7023 HARD_REG_SET unsaved;
7025 push (FPSCR_REG);
7026 COMPL_HARD_REG_SET (unsaved, *mask);
7027 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7028 skip_fpscr = 1;
7030 if (i != PR_REG
7031 && (i != FPSCR_REG || ! skip_fpscr)
7032 && TEST_HARD_REG_BIT (*mask, i))
7034 /* If the ISR has RESBANK attribute assigned, don't push any of
7035 the following registers - R0-R14, MACH, MACL and GBR. */
7036 if (! (sh_cfun_resbank_handler_p ()
7037 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7038 || i == MACH_REG
7039 || i == MACL_REG
7040 || i == GBR_REG)))
7041 push (i);
7045 /* Push banked registers last to improve delay slot opportunities. */
7046 if (interrupt_handler)
7048 bool use_movml = false;
7050 if (TARGET_SH2A)
7052 unsigned int count = 0;
7054 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7055 if (TEST_HARD_REG_BIT (*mask, i))
7056 count++;
7057 else
7058 break;
7060 /* Use movml when all banked registers are pushed. */
7061 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7062 use_movml = true;
7065 if (sh_cfun_resbank_handler_p ())
7066 ; /* Do nothing. */
7067 else if (use_movml)
7069 rtx x, mem, reg, set;
7070 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7072 /* We must avoid scheduling multiple store insn with another
7073 insns. */
7074 emit_insn (gen_blockage ());
7075 x = gen_movml_push_banked (sp_reg);
7076 x = frame_insn (x);
7077 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7079 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7080 reg = gen_rtx_REG (SImode, i);
7081 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
7084 set = gen_rtx_SET (SImode, sp_reg,
7085 plus_constant (Pmode, sp_reg, - 32));
7086 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7087 emit_insn (gen_blockage ());
7089 else
7090 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7091 if (TEST_HARD_REG_BIT (*mask, i))
7092 push (i);
7095 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7096 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7097 push (PR_REG);
7100 /* Calculate how much extra space is needed to save all callee-saved
7101 target registers.
7102 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7104 static int
7105 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7107 int reg;
7108 int stack_space = 0;
7109 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7111 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7112 if ((! call_really_used_regs[reg] || interrupt_handler)
7113 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7114 /* Leave space to save this target register on the stack,
7115 in case target register allocation wants to use it. */
7116 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7117 return stack_space;
7120 /* Decide whether we should reserve space for callee-save target registers,
7121 in case target register allocation wants to use them. REGS_SAVED is
7122 the space, in bytes, that is already required for register saves.
7123 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7125 static int
7126 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7127 HARD_REG_SET *live_regs_mask)
7129 if (optimize_size)
7130 return 0;
7131 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7134 /* Decide how much space to reserve for callee-save target registers
7135 in case target register allocation wants to use them.
7136 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7138 static int
7139 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7141 if (shmedia_space_reserved_for_target_registers)
7142 return shmedia_target_regs_stack_space (live_regs_mask);
7143 else
7144 return 0;
7147 /* Work out the registers which need to be saved, both as a mask and a
7148 count of saved words. Return the count.
7150 If doing a pragma interrupt function, then push all regs used by the
7151 function, and if we call another function (we can tell by looking at PR),
7152 make sure that all the regs it clobbers are safe too. */
7154 static int
7155 calc_live_regs (HARD_REG_SET *live_regs_mask)
7157 unsigned int reg;
7158 int count;
7159 tree attrs;
7160 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7161 bool nosave_low_regs;
7162 int pr_live, has_call;
7164 attrs = DECL_ATTRIBUTES (current_function_decl);
7165 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7166 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7167 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7168 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7170 CLEAR_HARD_REG_SET (*live_regs_mask);
7171 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7172 && df_regs_ever_live_p (FPSCR_REG))
7173 target_flags &= ~MASK_FPU_SINGLE;
7174 /* If we can save a lot of saves by switching to double mode, do that. */
7175 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
7176 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7177 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7178 && (! call_really_used_regs[reg]
7179 || interrupt_handler)
7180 && ++count > 2)
7182 target_flags &= ~MASK_FPU_SINGLE;
7183 break;
7185 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7186 knows how to use it. That means the pseudo originally allocated for
7187 the initial value can become the PR_MEDIA_REG hard register, as seen for
7188 execute/20010122-1.c:test9. */
7189 if (TARGET_SHMEDIA)
7190 /* ??? this function is called from initial_elimination_offset, hence we
7191 can't use the result of sh_media_register_for_return here. */
7192 pr_live = sh_pr_n_sets ();
7193 else
7195 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7196 pr_live = (pr_initial
7197 ? (!REG_P (pr_initial)
7198 || REGNO (pr_initial) != (PR_REG))
7199 : df_regs_ever_live_p (PR_REG));
7200 /* For Shcompact, if not optimizing, we end up with a memory reference
7201 using the return address pointer for __builtin_return_address even
7202 though there is no actual need to put the PR register on the stack. */
7203 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7205 /* Force PR to be live if the prologue has to call the SHmedia
7206 argument decoder or register saver. */
7207 if (TARGET_SHCOMPACT
7208 && ((crtl->args.info.call_cookie
7209 & ~ CALL_COOKIE_RET_TRAMP (1))
7210 || crtl->saves_all_registers))
7211 pr_live = 1;
7212 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7213 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7215 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7216 ? pr_live
7217 : interrupt_handler
7218 ? (/* Need to save all the regs ever live. */
7219 (df_regs_ever_live_p (reg)
7220 || (call_really_used_regs[reg]
7221 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7222 || reg == PIC_OFFSET_TABLE_REGNUM)
7223 && has_call)
7224 || (TARGET_SHMEDIA && has_call
7225 && REGISTER_NATURAL_MODE (reg) == SImode
7226 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7227 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7228 && reg != RETURN_ADDRESS_POINTER_REGNUM
7229 && reg != T_REG && reg != GBR_REG
7230 /* Push fpscr only on targets which have FPU */
7231 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7232 : (/* Only push those regs which are used and need to be saved. */
7233 (TARGET_SHCOMPACT
7234 && flag_pic
7235 && crtl->args.info.call_cookie
7236 && reg == PIC_OFFSET_TABLE_REGNUM)
7237 || (df_regs_ever_live_p (reg)
7238 && ((!call_really_used_regs[reg]
7239 && !(reg != PIC_OFFSET_TABLE_REGNUM
7240 && fixed_regs[reg] && call_used_regs[reg]))
7241 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7242 || (crtl->calls_eh_return
7243 && (reg == EH_RETURN_DATA_REGNO (0)
7244 || reg == EH_RETURN_DATA_REGNO (1)
7245 || reg == EH_RETURN_DATA_REGNO (2)
7246 || reg == EH_RETURN_DATA_REGNO (3)))
7247 || ((reg == MACL_REG || reg == MACH_REG)
7248 && df_regs_ever_live_p (reg)
7249 && sh_cfun_attr_renesas_p ())
7252 SET_HARD_REG_BIT (*live_regs_mask, reg);
7253 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7255 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7256 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7258 if (FP_REGISTER_P (reg))
7260 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7262 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7263 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7266 else if (XD_REGISTER_P (reg))
7268 /* Must switch to double mode to access these registers. */
7269 target_flags &= ~MASK_FPU_SINGLE;
7273 if (nosave_low_regs && reg == R8_REG)
7274 break;
7276 /* If we have a target register optimization pass after prologue / epilogue
7277 threading, we need to assume all target registers will be live even if
7278 they aren't now. */
7279 if (flag_branch_target_load_optimize2
7280 && TARGET_SAVE_ALL_TARGET_REGS
7281 && shmedia_space_reserved_for_target_registers)
7282 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7283 if ((! call_really_used_regs[reg] || interrupt_handler)
7284 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7286 SET_HARD_REG_BIT (*live_regs_mask, reg);
7287 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7289 /* If this is an interrupt handler, we don't have any call-clobbered
7290 registers we can conveniently use for target register save/restore.
7291 Make sure we save at least one general purpose register when we need
7292 to save target registers. */
7293 if (interrupt_handler
7294 && hard_reg_set_intersect_p (*live_regs_mask,
7295 reg_class_contents[TARGET_REGS])
7296 && ! hard_reg_set_intersect_p (*live_regs_mask,
7297 reg_class_contents[GENERAL_REGS]))
7299 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7300 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7303 return count;
7306 /* Code to generate prologue and epilogue sequences */
7308 /* PUSHED is the number of bytes that are being pushed on the
7309 stack for register saves. Return the frame size, padded
7310 appropriately so that the stack stays properly aligned. */
7311 static HOST_WIDE_INT
7312 rounded_frame_size (int pushed)
7314 HOST_WIDE_INT size = get_frame_size ();
7315 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7317 if (ACCUMULATE_OUTGOING_ARGS)
7318 size += crtl->outgoing_args_size;
7320 return ((size + pushed + align - 1) & -align) - pushed;
7323 /* Choose a call-clobbered target-branch register that remains
7324 unchanged along the whole function. We set it up as the return
7325 value in the prologue. */
7327 sh_media_register_for_return (void)
7329 int regno;
7330 int tr0_used;
7332 if (! crtl->is_leaf)
7333 return -1;
7334 if (lookup_attribute ("interrupt_handler",
7335 DECL_ATTRIBUTES (current_function_decl)))
7336 return -1;
7337 if (sh_cfun_interrupt_handler_p ())
7338 return -1;
7340 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7342 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7343 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7344 return regno;
7346 return -1;
7349 /* The maximum registers we need to save are:
7350 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7351 - 32 floating point registers (for each pair, we save none,
7352 one single precision value, or a double precision value).
7353 - 8 target registers
7354 - add 1 entry for a delimiter. */
7355 #define MAX_SAVED_REGS (62+32+8)
7357 typedef struct save_entry_s
7359 unsigned char reg;
7360 unsigned char mode;
7361 short offset;
7362 } save_entry;
7364 #define MAX_TEMPS 4
7366 /* There will be a delimiter entry with VOIDmode both at the start and the
7367 end of a filled in schedule. The end delimiter has the offset of the
7368 save with the smallest (i.e. most negative) offset. */
7369 typedef struct save_schedule_s
7371 save_entry entries[MAX_SAVED_REGS + 2];
7372 int temps[MAX_TEMPS+1];
7373 } save_schedule;
7375 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7376 use reverse order. Returns the last entry written to (not counting
7377 the delimiter). OFFSET_BASE is a number to be added to all offset
7378 entries. */
7380 static save_entry *
7381 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7382 int offset_base)
7384 int align, i;
7385 save_entry *entry = schedule->entries;
7386 int tmpx = 0;
7387 int offset;
7389 if (! current_function_interrupt)
7390 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7391 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7392 && ! FUNCTION_ARG_REGNO_P (i)
7393 && i != FIRST_RET_REG
7394 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7395 && ! (crtl->calls_eh_return
7396 && (i == EH_RETURN_STACKADJ_REGNO
7397 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7398 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7399 schedule->temps[tmpx++] = i;
7400 entry->reg = -1;
7401 entry->mode = VOIDmode;
7402 entry->offset = offset_base;
7403 entry++;
7404 /* We loop twice: first, we save 8-byte aligned registers in the
7405 higher addresses, that are known to be aligned. Then, we
7406 proceed to saving 32-bit registers that don't need 8-byte
7407 alignment.
7408 If this is an interrupt function, all registers that need saving
7409 need to be saved in full. moreover, we need to postpone saving
7410 target registers till we have saved some general purpose registers
7411 we can then use as scratch registers. */
7412 offset = offset_base;
7413 for (align = 1; align >= 0; align--)
7415 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7416 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7418 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
7419 int reg = i;
7421 if (current_function_interrupt)
7423 if (TARGET_REGISTER_P (i))
7424 continue;
7425 if (GENERAL_REGISTER_P (i))
7426 mode = DImode;
7428 if (mode == SFmode && (i % 2) == 1
7429 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7430 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7432 mode = DFmode;
7433 i--;
7434 reg--;
7437 /* If we're doing the aligned pass and this is not aligned,
7438 or we're doing the unaligned pass and this is aligned,
7439 skip it. */
7440 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7441 != align)
7442 continue;
7444 if (current_function_interrupt
7445 && GENERAL_REGISTER_P (i)
7446 && tmpx < MAX_TEMPS)
7447 schedule->temps[tmpx++] = i;
7449 offset -= GET_MODE_SIZE (mode);
7450 entry->reg = i;
7451 entry->mode = mode;
7452 entry->offset = offset;
7453 entry++;
7455 if (align && current_function_interrupt)
7456 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7457 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7459 offset -= GET_MODE_SIZE (DImode);
7460 entry->reg = i;
7461 entry->mode = DImode;
7462 entry->offset = offset;
7463 entry++;
7466 entry->reg = -1;
7467 entry->mode = VOIDmode;
7468 entry->offset = offset;
7469 schedule->temps[tmpx] = -1;
7470 return entry - 1;
7473 void
7474 sh_expand_prologue (void)
7476 HARD_REG_SET live_regs_mask;
7477 int d, i;
7478 int d_rounding = 0;
7479 int save_flags = target_flags;
7480 int pretend_args;
7481 int stack_usage;
7482 tree sp_switch_attr
7483 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7485 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7487 /* We have pretend args if we had an object sent partially in registers
7488 and partially on the stack, e.g. a large structure. */
7489 pretend_args = crtl->args.pretend_args_size;
7490 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7491 && (NPARM_REGS(SImode)
7492 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7493 pretend_args = 0;
7495 output_stack_adjust (-pretend_args
7496 - crtl->args.info.stack_regs * 8,
7497 stack_pointer_rtx, 0, NULL, true);
7498 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7500 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7501 /* We're going to use the PIC register to load the address of the
7502 incoming-argument decoder and/or of the return trampoline from
7503 the GOT, so make sure the PIC register is preserved and
7504 initialized. */
7505 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7507 if (TARGET_SHCOMPACT
7508 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7510 int reg;
7512 /* First, make all registers with incoming arguments that will
7513 be pushed onto the stack live, so that register renaming
7514 doesn't overwrite them. */
7515 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7516 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7517 >= NPARM_REGS (SImode) - reg)
7518 for (; reg < NPARM_REGS (SImode); reg++)
7519 emit_insn (gen_shcompact_preserve_incoming_args
7520 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7521 else if (CALL_COOKIE_INT_REG_GET
7522 (crtl->args.info.call_cookie, reg) == 1)
7523 emit_insn (gen_shcompact_preserve_incoming_args
7524 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7526 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7527 stack_pointer_rtx);
7528 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7529 GEN_INT (crtl->args.info.call_cookie));
7530 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7531 gen_rtx_REG (SImode, R0_REG));
7533 else if (TARGET_SHMEDIA)
7535 int tr = sh_media_register_for_return ();
7537 if (tr >= 0)
7538 emit_move_insn (gen_rtx_REG (DImode, tr),
7539 gen_rtx_REG (DImode, PR_MEDIA_REG));
7542 /* Emit the code for SETUP_VARARGS. */
7543 if (cfun->stdarg)
7545 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7547 /* Push arg regs as if they'd been provided by caller in stack. */
7548 for (i = 0; i < NPARM_REGS(SImode); i++)
7550 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7552 if (i >= (NPARM_REGS(SImode)
7553 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7555 break;
7556 push (rn);
7557 stack_usage += GET_MODE_SIZE (SImode);
7562 /* If we're supposed to switch stacks at function entry, do so now. */
7563 if (sp_switch_attr)
7565 rtx lab, newsrc;
7566 /* The argument specifies a variable holding the address of the
7567 stack the interrupt function should switch to/from at entry/exit. */
7568 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7569 const char *s
7570 = ggc_strdup (TREE_STRING_POINTER (arg));
7571 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7573 lab = add_constant (sp_switch, SImode, 0);
7574 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7575 newsrc = gen_const_mem (SImode, newsrc);
7577 emit_insn (gen_sp_switch_1 (newsrc));
7580 d = calc_live_regs (&live_regs_mask);
7581 /* ??? Maybe we could save some switching if we can move a mode switch
7582 that already happens to be at the function start into the prologue. */
7583 if (target_flags != save_flags && ! current_function_interrupt)
7584 emit_insn (gen_toggle_sz ());
7586 if (TARGET_SH5)
7588 int offset_base, offset;
7589 rtx r0 = NULL_RTX;
7590 int offset_in_r0 = -1;
7591 int sp_in_r0 = 0;
7592 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7593 int total_size, save_size;
7594 save_schedule schedule;
7595 save_entry *entry;
7596 int *tmp_pnt;
7598 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7599 && ! current_function_interrupt)
7600 r0 = gen_rtx_REG (Pmode, R0_REG);
7602 /* D is the actual number of bytes that we need for saving registers,
7603 however, in initial_elimination_offset we have committed to using
7604 an additional TREGS_SPACE amount of bytes - in order to keep both
7605 addresses to arguments supplied by the caller and local variables
7606 valid, we must keep this gap. Place it between the incoming
7607 arguments and the actually saved registers in a bid to optimize
7608 locality of reference. */
7609 total_size = d + tregs_space;
7610 total_size += rounded_frame_size (total_size);
7611 save_size = total_size - rounded_frame_size (d);
7612 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7613 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7614 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7616 /* If adjusting the stack in a single step costs nothing extra, do so.
7617 I.e. either if a single addi is enough, or we need a movi anyway,
7618 and we don't exceed the maximum offset range (the test for the
7619 latter is conservative for simplicity). */
7620 if (TARGET_SHMEDIA
7621 && (CONST_OK_FOR_I10 (-total_size)
7622 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7623 && total_size <= 2044)))
7624 d_rounding = total_size - save_size;
7626 offset_base = d + d_rounding;
7628 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7629 0, NULL, true);
7630 stack_usage += save_size + d_rounding;
7632 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7633 tmp_pnt = schedule.temps;
7634 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7636 enum machine_mode mode = (enum machine_mode) entry->mode;
7637 unsigned int reg = entry->reg;
7638 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7639 rtx orig_reg_rtx;
7641 offset = entry->offset;
7643 reg_rtx = gen_rtx_REG (mode, reg);
7645 mem_rtx = gen_frame_mem (mode,
7646 gen_rtx_PLUS (Pmode,
7647 stack_pointer_rtx,
7648 GEN_INT (offset)));
7650 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7652 gcc_assert (r0);
7653 mem_rtx = NULL_RTX;
7656 if (HAVE_PRE_DECREMENT
7657 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7658 || mem_rtx == NULL_RTX
7659 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7661 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7663 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7664 pre_dec = NULL_RTX;
7665 else
7667 mem_rtx = NULL_RTX;
7668 offset += GET_MODE_SIZE (mode);
7672 if (mem_rtx != NULL_RTX)
7673 goto addr_ok;
7675 if (offset_in_r0 == -1)
7677 emit_move_insn (r0, GEN_INT (offset));
7678 offset_in_r0 = offset;
7680 else if (offset != offset_in_r0)
7682 emit_move_insn (r0,
7683 gen_rtx_PLUS
7684 (Pmode, r0,
7685 GEN_INT (offset - offset_in_r0)));
7686 offset_in_r0 += offset - offset_in_r0;
7689 if (pre_dec != NULL_RTX)
7691 if (! sp_in_r0)
7693 emit_move_insn (r0,
7694 gen_rtx_PLUS
7695 (Pmode, r0, stack_pointer_rtx));
7696 sp_in_r0 = 1;
7699 offset -= GET_MODE_SIZE (mode);
7700 offset_in_r0 -= GET_MODE_SIZE (mode);
7702 mem_rtx = pre_dec;
7704 else if (sp_in_r0)
7705 mem_rtx = gen_frame_mem (mode, r0);
7706 else
7707 mem_rtx = gen_frame_mem (mode,
7708 gen_rtx_PLUS (Pmode,
7709 stack_pointer_rtx,
7710 r0));
7712 /* We must not use an r0-based address for target-branch
7713 registers or for special registers without pre-dec
7714 memory addresses, since we store their values in r0
7715 first. */
7716 gcc_assert (!TARGET_REGISTER_P (reg)
7717 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7718 || mem_rtx == pre_dec));
7720 addr_ok:
7721 orig_reg_rtx = reg_rtx;
7722 if (TARGET_REGISTER_P (reg)
7723 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7724 && mem_rtx != pre_dec))
7726 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7728 emit_move_insn (tmp_reg, reg_rtx);
7730 if (REGNO (tmp_reg) == R0_REG)
7732 offset_in_r0 = -1;
7733 sp_in_r0 = 0;
7734 gcc_assert (!refers_to_regno_p
7735 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7738 if (*++tmp_pnt <= 0)
7739 tmp_pnt = schedule.temps;
7741 reg_rtx = tmp_reg;
7744 rtx insn;
7746 /* Mark as interesting for dwarf cfi generator */
7747 insn = emit_move_insn (mem_rtx, reg_rtx);
7748 RTX_FRAME_RELATED_P (insn) = 1;
7749 /* If we use an intermediate register for the save, we can't
7750 describe this exactly in cfi as a copy of the to-be-saved
7751 register into the temporary register and then the temporary
7752 register on the stack, because the temporary register can
7753 have a different natural size than the to-be-saved register.
7754 Thus, we gloss over the intermediate copy and pretend we do
7755 a direct save from the to-be-saved register. */
7756 if (REGNO (reg_rtx) != reg)
7758 rtx set;
7760 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7761 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7764 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7766 rtx reg_rtx = gen_rtx_REG (mode, reg);
7767 rtx set;
7768 rtx mem_rtx = gen_frame_mem (mode,
7769 gen_rtx_PLUS (Pmode,
7770 stack_pointer_rtx,
7771 GEN_INT (offset)));
7773 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7774 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7779 gcc_assert (entry->offset == d_rounding);
7781 else
7783 push_regs (&live_regs_mask, current_function_interrupt);
7784 stack_usage += d;
7787 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7788 emit_insn (gen_GOTaddr2picreg ());
7790 if (SHMEDIA_REGS_STACK_ADJUST ())
7792 /* This must NOT go through the PLT, otherwise mach and macl
7793 may be clobbered. */
7794 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7795 (TARGET_FPU_ANY
7796 ? "__GCC_push_shmedia_regs"
7797 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7798 emit_insn (gen_shmedia_save_restore_regs_compact
7799 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7802 if (target_flags != save_flags && ! current_function_interrupt)
7803 emit_insn (gen_toggle_sz ());
7805 target_flags = save_flags;
7807 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7808 stack_pointer_rtx, 0, NULL, true);
7809 stack_usage += rounded_frame_size (d) - d_rounding;
7811 if (frame_pointer_needed)
7812 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7814 if (TARGET_SHCOMPACT
7815 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7817 /* This must NOT go through the PLT, otherwise mach and macl
7818 may be clobbered. */
7819 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7820 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7821 emit_insn (gen_shcompact_incoming_args ());
7824 /* If we are profiling, make sure no instructions are scheduled before
7825 the call to mcount. Similarly if some call instructions are swapped
7826 before frame related insns, it'll confuse the unwinder because
7827 currently SH has no unwind info for function epilogues. */
7828 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7829 emit_insn (gen_blockage ());
7831 if (flag_stack_usage_info)
7832 current_function_static_stack_size = stack_usage;
7835 void
7836 sh_expand_epilogue (bool sibcall_p)
7838 HARD_REG_SET live_regs_mask;
7839 int d, i;
7840 int d_rounding = 0;
7842 int save_flags = target_flags;
7843 int frame_size, save_size;
7844 int fpscr_deferred = 0;
7845 int e = sibcall_p ? -1 : 1;
7847 d = calc_live_regs (&live_regs_mask);
7849 save_size = d;
7850 frame_size = rounded_frame_size (d);
7852 if (TARGET_SH5)
7854 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7855 int total_size;
7856 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7857 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7858 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7860 total_size = d + tregs_space;
7861 total_size += rounded_frame_size (total_size);
7862 save_size = total_size - frame_size;
7864 /* If adjusting the stack in a single step costs nothing extra, do so.
7865 I.e. either if a single addi is enough, or we need a movi anyway,
7866 and we don't exceed the maximum offset range (the test for the
7867 latter is conservative for simplicity). */
7868 if (TARGET_SHMEDIA
7869 && ! frame_pointer_needed
7870 && (CONST_OK_FOR_I10 (total_size)
7871 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7872 && total_size <= 2044)))
7873 d_rounding = frame_size;
7875 frame_size -= d_rounding;
7878 if (frame_pointer_needed)
7880 /* We must avoid scheduling the epilogue with previous basic blocks.
7881 See PR/18032 and PR/40313. */
7882 emit_insn (gen_blockage ());
7883 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7884 &live_regs_mask, true);
7886 /* We must avoid moving the stack pointer adjustment past code
7887 which reads from the local frame, else an interrupt could
7888 occur after the SP adjustment and clobber data in the local
7889 frame. */
7890 emit_insn (gen_blockage ());
7891 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7893 else if (frame_size)
7895 /* We must avoid moving the stack pointer adjustment past code
7896 which reads from the local frame, else an interrupt could
7897 occur after the SP adjustment and clobber data in the local
7898 frame. */
7899 emit_insn (gen_blockage ());
7900 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7901 &live_regs_mask, true);
7904 if (SHMEDIA_REGS_STACK_ADJUST ())
7906 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7907 (TARGET_FPU_ANY
7908 ? "__GCC_pop_shmedia_regs"
7909 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7910 /* This must NOT go through the PLT, otherwise mach and macl
7911 may be clobbered. */
7912 emit_insn (gen_shmedia_save_restore_regs_compact
7913 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7916 /* Pop all the registers. */
7918 if (target_flags != save_flags && ! current_function_interrupt)
7919 emit_insn (gen_toggle_sz ());
7920 if (TARGET_SH5)
7922 int offset_base, offset;
7923 int offset_in_r0 = -1;
7924 int sp_in_r0 = 0;
7925 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7926 save_schedule schedule;
7927 save_entry *entry;
7928 int *tmp_pnt;
7930 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7931 offset_base = -entry[1].offset + d_rounding;
7932 tmp_pnt = schedule.temps;
7933 for (; entry->mode != VOIDmode; entry--)
7935 enum machine_mode mode = (enum machine_mode) entry->mode;
7936 int reg = entry->reg;
7937 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7939 offset = offset_base + entry->offset;
7940 reg_rtx = gen_rtx_REG (mode, reg);
7942 mem_rtx = gen_frame_mem (mode,
7943 gen_rtx_PLUS (Pmode,
7944 stack_pointer_rtx,
7945 GEN_INT (offset)));
7947 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7948 mem_rtx = NULL_RTX;
7950 if (HAVE_POST_INCREMENT
7951 && (offset == offset_in_r0
7952 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7953 && mem_rtx == NULL_RTX)
7954 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7956 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7958 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7959 post_inc = NULL_RTX;
7960 else
7961 mem_rtx = NULL_RTX;
7964 if (mem_rtx != NULL_RTX)
7965 goto addr_ok;
7967 if (offset_in_r0 == -1)
7969 emit_move_insn (r0, GEN_INT (offset));
7970 offset_in_r0 = offset;
7972 else if (offset != offset_in_r0)
7974 emit_move_insn (r0,
7975 gen_rtx_PLUS
7976 (Pmode, r0,
7977 GEN_INT (offset - offset_in_r0)));
7978 offset_in_r0 += offset - offset_in_r0;
7981 if (post_inc != NULL_RTX)
7983 if (! sp_in_r0)
7985 emit_move_insn (r0,
7986 gen_rtx_PLUS
7987 (Pmode, r0, stack_pointer_rtx));
7988 sp_in_r0 = 1;
7991 mem_rtx = post_inc;
7993 offset_in_r0 += GET_MODE_SIZE (mode);
7995 else if (sp_in_r0)
7996 mem_rtx = gen_frame_mem (mode, r0);
7997 else
7998 mem_rtx = gen_frame_mem (mode,
7999 gen_rtx_PLUS (Pmode,
8000 stack_pointer_rtx,
8001 r0));
8003 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8004 || mem_rtx == post_inc);
8006 addr_ok:
8007 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8008 && mem_rtx != post_inc)
8010 emit_move_insn (r0, mem_rtx);
8011 mem_rtx = r0;
8013 else if (TARGET_REGISTER_P (reg))
8015 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8017 /* Give the scheduler a bit of freedom by using up to
8018 MAX_TEMPS registers in a round-robin fashion. */
8019 emit_move_insn (tmp_reg, mem_rtx);
8020 mem_rtx = tmp_reg;
8021 if (*++tmp_pnt < 0)
8022 tmp_pnt = schedule.temps;
8025 emit_move_insn (reg_rtx, mem_rtx);
8028 gcc_assert (entry->offset + offset_base == d + d_rounding);
8030 else /* ! TARGET_SH5 */
8032 int last_reg;
8034 save_size = 0;
8035 /* For an ISR with RESBANK attribute assigned, don't pop PR
8036 register. */
8037 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8038 && !sh_cfun_resbank_handler_p ())
8040 if (!frame_pointer_needed)
8041 emit_insn (gen_blockage ());
8042 pop (PR_REG);
8045 /* Banked registers are popped first to avoid being scheduled in the
8046 delay slot. RTE switches banks before the ds instruction. */
8047 if (current_function_interrupt)
8049 bool use_movml = false;
8051 if (TARGET_SH2A)
8053 unsigned int count = 0;
8055 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8056 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8057 count++;
8058 else
8059 break;
8061 /* Use movml when all banked register are poped. */
8062 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8063 use_movml = true;
8066 if (sh_cfun_resbank_handler_p ())
8067 ; /* Do nothing. */
8068 else if (use_movml)
8070 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8072 /* We must avoid scheduling multiple load insn with another
8073 insns. */
8074 emit_insn (gen_blockage ());
8075 emit_insn (gen_movml_pop_banked (sp_reg));
8076 emit_insn (gen_blockage ());
8078 else
8079 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8080 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8081 pop (i);
8083 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8085 else
8086 last_reg = FIRST_PSEUDO_REGISTER;
8088 for (i = 0; i < last_reg; i++)
8090 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8092 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8093 && hard_reg_set_intersect_p (live_regs_mask,
8094 reg_class_contents[DF_REGS]))
8095 fpscr_deferred = 1;
8096 /* For an ISR with RESBANK attribute assigned, don't pop
8097 following registers, R0-R14, MACH, MACL and GBR. */
8098 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8099 && ! (sh_cfun_resbank_handler_p ()
8100 && ((j >= FIRST_GENERAL_REG
8101 && j < LAST_GENERAL_REG)
8102 || j == MACH_REG
8103 || j == MACL_REG
8104 || j == GBR_REG)))
8105 pop (j);
8107 if (j == FIRST_FP_REG && fpscr_deferred)
8108 pop (FPSCR_REG);
8111 if (target_flags != save_flags && ! current_function_interrupt)
8112 emit_insn (gen_toggle_sz ());
8113 target_flags = save_flags;
8115 output_stack_adjust (crtl->args.pretend_args_size
8116 + save_size + d_rounding
8117 + crtl->args.info.stack_regs * 8,
8118 stack_pointer_rtx, e, NULL, true);
8120 if (crtl->calls_eh_return)
8121 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8122 EH_RETURN_STACKADJ_RTX));
8124 /* Switch back to the normal stack if necessary. */
8125 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8126 emit_insn (gen_sp_switch_2 ());
8128 /* Tell flow the insn that pops PR isn't dead. */
8129 /* PR_REG will never be live in SHmedia mode, and we don't need to
8130 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8131 by the return pattern. */
8132 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8133 emit_use (gen_rtx_REG (SImode, PR_REG));
8136 /* Emit code to change the current function's return address to RA.
8137 TEMP is available as a scratch register, if needed. */
8139 void
8140 sh_set_return_address (rtx ra, rtx tmp)
8142 HARD_REG_SET live_regs_mask;
8143 int d;
8144 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8145 int pr_offset;
8147 d = calc_live_regs (&live_regs_mask);
8149 /* If pr_reg isn't life, we can set it (or the register given in
8150 sh_media_register_for_return) directly. */
8151 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8153 rtx rr;
8155 if (TARGET_SHMEDIA)
8157 int rr_regno = sh_media_register_for_return ();
8159 if (rr_regno < 0)
8160 rr_regno = pr_reg;
8162 rr = gen_rtx_REG (DImode, rr_regno);
8164 else
8165 rr = gen_rtx_REG (SImode, pr_reg);
8167 emit_insn (GEN_MOV (rr, ra));
8168 /* Tell flow the register for return isn't dead. */
8169 emit_use (rr);
8170 return;
8173 if (TARGET_SH5)
8175 int offset;
8176 save_schedule schedule;
8177 save_entry *entry;
8179 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8180 offset = entry[1].offset;
8181 for (; entry->mode != VOIDmode; entry--)
8182 if (entry->reg == pr_reg)
8183 goto found;
8185 /* We can't find pr register. */
8186 gcc_unreachable ();
8188 found:
8189 offset = entry->offset - offset;
8190 pr_offset = (rounded_frame_size (d) + offset
8191 + SHMEDIA_REGS_STACK_ADJUST ());
8193 else
8194 pr_offset = rounded_frame_size (d);
8196 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8198 if (frame_pointer_needed)
8199 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8200 else
8201 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8203 tmp = gen_frame_mem (Pmode, tmp);
8204 emit_insn (GEN_MOV (tmp, ra));
8205 /* Tell this store isn't dead. */
8206 emit_use (tmp);
8209 /* Clear variables at function end. */
8211 static void
8212 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8213 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8217 static rtx
8218 sh_builtin_saveregs (void)
8220 /* First unnamed integer register. */
8221 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8222 /* Number of integer registers we need to save. */
8223 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8224 /* First unnamed SFmode float reg */
8225 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8226 /* Number of SFmode float regs to save. */
8227 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8228 rtx regbuf, fpregs;
8229 int bufsize, regno;
8230 alias_set_type alias_set;
8232 if (TARGET_SH5)
8234 if (n_intregs)
8236 int pushregs = n_intregs;
8238 while (pushregs < NPARM_REGS (SImode) - 1
8239 && (CALL_COOKIE_INT_REG_GET
8240 (crtl->args.info.call_cookie,
8241 NPARM_REGS (SImode) - pushregs)
8242 == 1))
8244 crtl->args.info.call_cookie
8245 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8246 - pushregs, 1);
8247 pushregs++;
8250 if (pushregs == NPARM_REGS (SImode))
8251 crtl->args.info.call_cookie
8252 |= (CALL_COOKIE_INT_REG (0, 1)
8253 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8254 else
8255 crtl->args.info.call_cookie
8256 |= CALL_COOKIE_STACKSEQ (pushregs);
8258 crtl->args.pretend_args_size += 8 * n_intregs;
8260 if (TARGET_SHCOMPACT)
8261 return const0_rtx;
8264 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8266 error ("__builtin_saveregs not supported by this subtarget");
8267 return const0_rtx;
8270 if (TARGET_SHMEDIA)
8271 n_floatregs = 0;
8273 /* Allocate block of memory for the regs. */
8274 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8275 Or can assign_stack_local accept a 0 SIZE argument? */
8276 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8278 if (TARGET_SHMEDIA)
8279 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8280 else if (n_floatregs & 1)
8282 rtx addr;
8284 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8285 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8286 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8287 regbuf = change_address (regbuf, BLKmode, addr);
8289 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8291 rtx addr, mask;
8293 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8294 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8295 XEXP (regbuf, 0), 4));
8296 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8297 emit_insn (gen_andsi3 (addr, addr, mask));
8298 regbuf = change_address (regbuf, BLKmode, addr);
8300 else
8301 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8302 alias_set = get_varargs_alias_set ();
8303 set_mem_alias_set (regbuf, alias_set);
8305 /* Save int args.
8306 This is optimized to only save the regs that are necessary. Explicitly
8307 named args need not be saved. */
8308 if (n_intregs > 0)
8309 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8310 adjust_address (regbuf, BLKmode,
8311 n_floatregs * UNITS_PER_WORD),
8312 n_intregs);
8314 if (TARGET_SHMEDIA)
8315 /* Return the address of the regbuf. */
8316 return XEXP (regbuf, 0);
8318 /* Save float args.
8319 This is optimized to only save the regs that are necessary. Explicitly
8320 named args need not be saved.
8321 We explicitly build a pointer to the buffer because it halves the insn
8322 count when not optimizing (otherwise the pointer is built for each reg
8323 saved).
8324 We emit the moves in reverse order so that we can use predecrement. */
8326 fpregs = copy_to_mode_reg (Pmode,
8327 plus_constant (Pmode, XEXP (regbuf, 0),
8328 n_floatregs * UNITS_PER_WORD));
8329 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8331 rtx mem;
8332 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8334 emit_insn (gen_addsi3 (fpregs, fpregs,
8335 GEN_INT (-2 * UNITS_PER_WORD)));
8336 mem = change_address (regbuf, DFmode, fpregs);
8337 emit_move_insn (mem,
8338 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8340 regno = first_floatreg;
8341 if (regno & 1)
8343 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8344 mem = change_address (regbuf, SFmode, fpregs);
8345 emit_move_insn (mem,
8346 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
8347 - (TARGET_LITTLE_ENDIAN != 0)));
8350 else
8351 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8353 rtx mem;
8355 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8356 mem = change_address (regbuf, SFmode, fpregs);
8357 emit_move_insn (mem,
8358 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8361 /* Return the address of the regbuf. */
8362 return XEXP (regbuf, 0);
8365 /* Define the `__builtin_va_list' type for the ABI. */
8367 static tree
8368 sh_build_builtin_va_list (void)
8370 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8371 tree record, type_decl;
8373 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8374 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8375 return ptr_type_node;
8377 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8378 type_decl = build_decl (BUILTINS_LOCATION,
8379 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8381 f_next_o = build_decl (BUILTINS_LOCATION,
8382 FIELD_DECL, get_identifier ("__va_next_o"),
8383 ptr_type_node);
8384 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8385 FIELD_DECL,
8386 get_identifier ("__va_next_o_limit"),
8387 ptr_type_node);
8388 f_next_fp = build_decl (BUILTINS_LOCATION,
8389 FIELD_DECL, get_identifier ("__va_next_fp"),
8390 ptr_type_node);
8391 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8392 FIELD_DECL,
8393 get_identifier ("__va_next_fp_limit"),
8394 ptr_type_node);
8395 f_next_stack = build_decl (BUILTINS_LOCATION,
8396 FIELD_DECL, get_identifier ("__va_next_stack"),
8397 ptr_type_node);
8399 DECL_FIELD_CONTEXT (f_next_o) = record;
8400 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8401 DECL_FIELD_CONTEXT (f_next_fp) = record;
8402 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8403 DECL_FIELD_CONTEXT (f_next_stack) = record;
8405 TYPE_STUB_DECL (record) = type_decl;
8406 TYPE_NAME (record) = type_decl;
8407 TYPE_FIELDS (record) = f_next_o;
8408 DECL_CHAIN (f_next_o) = f_next_o_limit;
8409 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8410 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8411 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8413 layout_type (record);
8415 return record;
8418 /* Implement `va_start' for varargs and stdarg. */
8420 static void
8421 sh_va_start (tree valist, rtx nextarg)
8423 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8424 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8425 tree t, u;
8426 int nfp, nint;
8428 if (TARGET_SH5)
8430 expand_builtin_saveregs ();
8431 std_expand_builtin_va_start (valist, nextarg);
8432 return;
8435 if ((! TARGET_SH2E && ! TARGET_SH4)
8436 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8438 std_expand_builtin_va_start (valist, nextarg);
8439 return;
8442 f_next_o = TYPE_FIELDS (va_list_type_node);
8443 f_next_o_limit = DECL_CHAIN (f_next_o);
8444 f_next_fp = DECL_CHAIN (f_next_o_limit);
8445 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8446 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8448 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8449 NULL_TREE);
8450 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8451 valist, f_next_o_limit, NULL_TREE);
8452 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8453 NULL_TREE);
8454 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8455 valist, f_next_fp_limit, NULL_TREE);
8456 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8457 valist, f_next_stack, NULL_TREE);
8459 /* Call __builtin_saveregs. */
8460 u = make_tree (sizetype, expand_builtin_saveregs ());
8461 u = fold_convert (ptr_type_node, u);
8462 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8463 TREE_SIDE_EFFECTS (t) = 1;
8464 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8466 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8467 if (nfp < 8)
8468 nfp = 8 - nfp;
8469 else
8470 nfp = 0;
8471 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8472 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8473 TREE_SIDE_EFFECTS (t) = 1;
8474 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8476 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8477 TREE_SIDE_EFFECTS (t) = 1;
8478 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8480 nint = crtl->args.info.arg_count[SH_ARG_INT];
8481 if (nint < 4)
8482 nint = 4 - nint;
8483 else
8484 nint = 0;
8485 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8486 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8487 TREE_SIDE_EFFECTS (t) = 1;
8488 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8490 u = make_tree (ptr_type_node, nextarg);
8491 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8492 TREE_SIDE_EFFECTS (t) = 1;
8493 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8496 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8497 member, return it. */
8498 static tree
8499 find_sole_member (tree type)
8501 tree field, member = NULL_TREE;
8503 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8505 if (TREE_CODE (field) != FIELD_DECL)
8506 continue;
8507 if (!DECL_SIZE (field))
8508 return NULL_TREE;
8509 if (integer_zerop (DECL_SIZE (field)))
8510 continue;
8511 if (member)
8512 return NULL_TREE;
8513 member = field;
8515 return member;
8517 /* Implement `va_arg'. */
8519 static tree
8520 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8521 gimple_seq *post_p ATTRIBUTE_UNUSED)
8523 HOST_WIDE_INT size, rsize;
8524 tree tmp, pptr_type_node;
8525 tree addr, lab_over = NULL, result = NULL;
8526 bool pass_by_ref;
8527 tree eff_type;
8529 if (!VOID_TYPE_P (type))
8530 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8531 else
8532 pass_by_ref = false;
8534 if (pass_by_ref)
8535 type = build_pointer_type (type);
8537 size = int_size_in_bytes (type);
8538 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8539 pptr_type_node = build_pointer_type (ptr_type_node);
8541 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8542 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8544 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8545 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8546 int pass_as_float;
8547 tree lab_false;
8548 tree member;
8550 f_next_o = TYPE_FIELDS (va_list_type_node);
8551 f_next_o_limit = DECL_CHAIN (f_next_o);
8552 f_next_fp = DECL_CHAIN (f_next_o_limit);
8553 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8554 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8556 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8557 NULL_TREE);
8558 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8559 valist, f_next_o_limit, NULL_TREE);
8560 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8561 valist, f_next_fp, NULL_TREE);
8562 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8563 valist, f_next_fp_limit, NULL_TREE);
8564 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8565 valist, f_next_stack, NULL_TREE);
8567 /* Structures with a single member with a distinct mode are passed
8568 like their member. This is relevant if the latter has a REAL_TYPE
8569 or COMPLEX_TYPE type. */
8570 eff_type = type;
8571 while (TREE_CODE (eff_type) == RECORD_TYPE
8572 && (member = find_sole_member (eff_type))
8573 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8574 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8575 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8577 tree field_type = TREE_TYPE (member);
8579 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8580 eff_type = field_type;
8581 else
8583 gcc_assert ((TYPE_ALIGN (eff_type)
8584 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8585 || (TYPE_ALIGN (eff_type)
8586 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8587 break;
8591 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8593 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8594 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8595 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8596 && size <= 16));
8598 else
8600 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8603 addr = create_tmp_var (pptr_type_node, NULL);
8604 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8605 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8607 valist = build_simple_mem_ref (addr);
8609 if (pass_as_float)
8611 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8612 tree cmp;
8613 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8615 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8616 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8618 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8619 tmp = next_fp_limit;
8620 if (size > 4 && !is_double)
8621 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8622 tmp = build2 (GE_EXPR, boolean_type_node,
8623 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8624 cmp = build3 (COND_EXPR, void_type_node, tmp,
8625 build1 (GOTO_EXPR, void_type_node,
8626 unshare_expr (lab_false)), NULL_TREE);
8627 if (!is_double)
8628 gimplify_and_add (cmp, pre_p);
8630 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8631 || (is_double || size == 16))
8633 tmp = fold_convert (sizetype, next_fp_tmp);
8634 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8635 size_int (UNITS_PER_WORD));
8636 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8637 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8639 if (is_double)
8640 gimplify_and_add (cmp, pre_p);
8642 #ifdef FUNCTION_ARG_SCmode_WART
8643 if (TYPE_MODE (eff_type) == SCmode
8644 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8646 tree subtype = TREE_TYPE (eff_type);
8647 tree real, imag;
8649 imag
8650 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8651 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8653 real
8654 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8655 real = get_initialized_tmp_var (real, pre_p, NULL);
8657 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8658 if (type != eff_type)
8659 result = build1 (VIEW_CONVERT_EXPR, type, result);
8660 result = get_initialized_tmp_var (result, pre_p, NULL);
8662 #endif /* FUNCTION_ARG_SCmode_WART */
8664 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8665 gimplify_and_add (tmp, pre_p);
8667 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8668 gimplify_and_add (tmp, pre_p);
8670 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8671 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8672 gimplify_assign (unshare_expr (next_fp_tmp),
8673 unshare_expr (valist), pre_p);
8675 gimplify_assign (unshare_expr (valist),
8676 unshare_expr (next_fp_tmp), post_p);
8677 valist = next_fp_tmp;
8679 else
8681 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8682 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8683 unshare_expr (next_o_limit));
8684 tmp = build3 (COND_EXPR, void_type_node, tmp,
8685 build1 (GOTO_EXPR, void_type_node,
8686 unshare_expr (lab_false)),
8687 NULL_TREE);
8688 gimplify_and_add (tmp, pre_p);
8690 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8691 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8693 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8694 gimplify_and_add (tmp, pre_p);
8696 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8697 gimplify_and_add (tmp, pre_p);
8699 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8700 gimplify_assign (unshare_expr (next_o),
8701 unshare_expr (next_o_limit), pre_p);
8703 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8704 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8707 if (!result)
8709 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8710 gimplify_and_add (tmp, pre_p);
8714 /* ??? In va-sh.h, there had been code to make values larger than
8715 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8717 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8718 if (result)
8720 gimplify_assign (result, tmp, pre_p);
8721 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8722 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8723 gimplify_and_add (tmp, pre_p);
8725 else
8726 result = tmp;
8728 if (pass_by_ref)
8729 result = build_va_arg_indirect_ref (result);
8731 return result;
8734 /* 64 bit floating points memory transfers are paired single precision loads
8735 or store. So DWARF information needs fixing in little endian (unless
8736 PR=SZ=1 in FPSCR). */
8738 sh_dwarf_register_span (rtx reg)
8740 unsigned regno = REGNO (reg);
8742 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8743 return NULL_RTX;
8745 return
8746 gen_rtx_PARALLEL (VOIDmode,
8747 gen_rtvec (2,
8748 gen_rtx_REG (SFmode, regno + 1),
8749 gen_rtx_REG (SFmode, regno)));
8752 static enum machine_mode
8753 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8754 int *punsignedp, const_tree funtype,
8755 int for_return)
8757 if (sh_promote_prototypes (funtype))
8758 return promote_mode (type, mode, punsignedp);
8759 else
8760 return default_promote_function_mode (type, mode, punsignedp, funtype,
8761 for_return);
8764 static bool
8765 sh_promote_prototypes (const_tree type)
8767 if (TARGET_HITACHI)
8768 return false;
8769 if (! type)
8770 return true;
8771 return ! sh_attr_renesas_p (type);
8774 /* Whether an argument must be passed by reference. On SHcompact, we
8775 pretend arguments wider than 32-bits that would have been passed in
8776 registers are passed by reference, so that an SHmedia trampoline
8777 loads them into the full 64-bits registers. */
8779 static int
8780 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8781 const_tree type, bool named)
8783 unsigned HOST_WIDE_INT size;
8785 if (type)
8786 size = int_size_in_bytes (type);
8787 else
8788 size = GET_MODE_SIZE (mode);
8790 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8791 && (!named
8792 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8793 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8794 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8795 && size > 4
8796 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8797 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8798 return size;
8799 else
8800 return 0;
8803 static bool
8804 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8805 const_tree type, bool named)
8807 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8809 if (targetm.calls.must_pass_in_stack (mode, type))
8810 return true;
8812 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8813 wants to know about pass-by-reference semantics for incoming
8814 arguments. */
8815 if (! cum)
8816 return false;
8818 if (TARGET_SHCOMPACT)
8820 cum->byref = shcompact_byref (cum, mode, type, named);
8821 return cum->byref != 0;
8824 return false;
8827 static bool
8828 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8829 const_tree type, bool named ATTRIBUTE_UNUSED)
8831 /* ??? How can it possibly be correct to return true only on the
8832 caller side of the equation? Is there someplace else in the
8833 sh backend that's magically producing the copies? */
8834 return (get_cumulative_args (cum)->outgoing
8835 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8836 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8839 static int
8840 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8841 tree type, bool named ATTRIBUTE_UNUSED)
8843 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8844 int words = 0;
8846 if (!TARGET_SH5
8847 && PASS_IN_REG_P (*cum, mode, type)
8848 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8849 && (ROUND_REG (*cum, mode)
8850 + (mode != BLKmode
8851 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8852 : ROUND_ADVANCE (int_size_in_bytes (type)))
8853 > NPARM_REGS (mode)))
8854 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8856 else if (!TARGET_SHCOMPACT
8857 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8858 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8860 return words * UNITS_PER_WORD;
8864 /* Define where to put the arguments to a function.
8865 Value is zero to push the argument on the stack,
8866 or a hard register in which to store the argument.
8868 MODE is the argument's machine mode.
8869 TYPE is the data type of the argument (as a tree).
8870 This is null for libcalls where that information may
8871 not be available.
8872 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8873 the preceding args and about the function being called.
8874 NAMED is nonzero if this argument is a named parameter
8875 (otherwise it is an extra parameter matching an ellipsis).
8877 On SH the first args are normally in registers
8878 and the rest are pushed. Any arg that starts within the first
8879 NPARM_REGS words is at least partially passed in a register unless
8880 its data type forbids. */
8882 static rtx
8883 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8884 const_tree type, bool named)
8886 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8888 if (! TARGET_SH5 && mode == VOIDmode)
8889 return GEN_INT (ca->renesas_abi ? 1 : 0);
8891 if (! TARGET_SH5
8892 && PASS_IN_REG_P (*ca, mode, type)
8893 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8895 int regno;
8897 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8898 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8900 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8901 gen_rtx_REG (SFmode,
8902 BASE_ARG_REG (mode)
8903 + (ROUND_REG (*ca, mode) ^ 1)),
8904 const0_rtx);
8905 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8906 gen_rtx_REG (SFmode,
8907 BASE_ARG_REG (mode)
8908 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8909 GEN_INT (4));
8910 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8913 /* If the alignment of a DF value causes an SF register to be
8914 skipped, we will use that skipped register for the next SF
8915 value. */
8916 if ((TARGET_HITACHI || ca->renesas_abi)
8917 && ca->free_single_fp_reg
8918 && mode == SFmode)
8919 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8921 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8922 ^ (mode == SFmode && TARGET_SH4
8923 && TARGET_LITTLE_ENDIAN
8924 && ! TARGET_HITACHI && ! ca->renesas_abi);
8925 return gen_rtx_REG (mode, regno);
8929 if (TARGET_SH5)
8931 if (mode == VOIDmode && TARGET_SHCOMPACT)
8932 return GEN_INT (ca->call_cookie);
8934 /* The following test assumes unnamed arguments are promoted to
8935 DFmode. */
8936 if (mode == SFmode && ca->free_single_fp_reg)
8937 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8939 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8940 && (named || ! ca->prototype_p)
8941 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8943 if (! ca->prototype_p && TARGET_SHMEDIA)
8944 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8946 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8947 FIRST_FP_PARM_REG
8948 + ca->arg_count[(int) SH_ARG_FLOAT]);
8951 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8952 && (! TARGET_SHCOMPACT
8953 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8954 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8955 type, named))))
8957 return gen_rtx_REG (mode, (FIRST_PARM_REG
8958 + ca->arg_count[(int) SH_ARG_INT]));
8961 return NULL_RTX;
8964 return NULL_RTX;
8967 /* Update the data in CUM to advance over an argument
8968 of mode MODE and data type TYPE.
8969 (TYPE is null for libcalls where that information may not be
8970 available.) */
8972 static void
8973 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
8974 const_tree type, bool named)
8976 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8978 if (ca->force_mem)
8979 ca->force_mem = 0;
8980 else if (TARGET_SH5)
8982 const_tree type2 = (ca->byref && type
8983 ? TREE_TYPE (type)
8984 : type);
8985 enum machine_mode mode2 = (ca->byref && type
8986 ? TYPE_MODE (type2)
8987 : mode);
8988 int dwords = ((ca->byref
8989 ? ca->byref
8990 : mode2 == BLKmode
8991 ? int_size_in_bytes (type2)
8992 : GET_MODE_SIZE (mode2)) + 7) / 8;
8993 int numregs = MIN (dwords, NPARM_REGS (SImode)
8994 - ca->arg_count[(int) SH_ARG_INT]);
8996 if (numregs)
8998 ca->arg_count[(int) SH_ARG_INT] += numregs;
8999 if (TARGET_SHCOMPACT
9000 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9002 ca->call_cookie
9003 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9004 - numregs, 1);
9005 /* N.B. We want this also for outgoing. */
9006 ca->stack_regs += numregs;
9008 else if (ca->byref)
9010 if (! ca->outgoing)
9011 ca->stack_regs += numregs;
9012 ca->byref_regs += numregs;
9013 ca->byref = 0;
9015 ca->call_cookie
9016 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9017 - numregs, 2);
9018 while (--numregs);
9019 ca->call_cookie
9020 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9021 - 1, 1);
9023 else if (dwords > numregs)
9025 int pushregs = numregs;
9027 if (TARGET_SHCOMPACT)
9028 ca->stack_regs += numregs;
9029 while (pushregs < NPARM_REGS (SImode) - 1
9030 && (CALL_COOKIE_INT_REG_GET
9031 (ca->call_cookie,
9032 NPARM_REGS (SImode) - pushregs)
9033 == 1))
9035 ca->call_cookie
9036 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9037 - pushregs, 1);
9038 pushregs++;
9040 if (numregs == NPARM_REGS (SImode))
9041 ca->call_cookie
9042 |= CALL_COOKIE_INT_REG (0, 1)
9043 | CALL_COOKIE_STACKSEQ (numregs - 1);
9044 else
9045 ca->call_cookie
9046 |= CALL_COOKIE_STACKSEQ (numregs);
9049 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9050 && (named || ! ca->prototype_p))
9052 if (mode2 == SFmode && ca->free_single_fp_reg)
9053 ca->free_single_fp_reg = 0;
9054 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9055 < NPARM_REGS (SFmode))
9057 int numfpregs
9058 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9059 NPARM_REGS (SFmode)
9060 - ca->arg_count[(int) SH_ARG_FLOAT]);
9062 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9064 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9066 if (ca->outgoing && numregs > 0)
9069 ca->call_cookie
9070 |= (CALL_COOKIE_INT_REG
9071 (ca->arg_count[(int) SH_ARG_INT]
9072 - numregs + ((numfpregs - 2) / 2),
9073 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9074 - numfpregs) / 2));
9076 while (numfpregs -= 2);
9078 else if (mode2 == SFmode && (named)
9079 && (ca->arg_count[(int) SH_ARG_FLOAT]
9080 < NPARM_REGS (SFmode)))
9081 ca->free_single_fp_reg
9082 = FIRST_FP_PARM_REG - numfpregs
9083 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9086 return;
9089 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9091 /* Note that we've used the skipped register. */
9092 if (mode == SFmode && ca->free_single_fp_reg)
9094 ca->free_single_fp_reg = 0;
9095 return;
9097 /* When we have a DF after an SF, there's an SF register that get
9098 skipped in order to align the DF value. We note this skipped
9099 register, because the next SF value will use it, and not the
9100 SF that follows the DF. */
9101 if (mode == DFmode
9102 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
9104 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
9105 + BASE_ARG_REG (mode));
9109 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9110 || PASS_IN_REG_P (*ca, mode, type))
9111 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9112 = (ROUND_REG (*ca, mode)
9113 + (mode == BLKmode
9114 ? ROUND_ADVANCE (int_size_in_bytes (type))
9115 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
9118 /* The Renesas calling convention doesn't quite fit into this scheme since
9119 the address is passed like an invisible argument, but one that is always
9120 passed in memory. */
9121 static rtx
9122 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9124 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9125 return NULL_RTX;
9126 return gen_rtx_REG (Pmode, 2);
9129 /* Worker function for TARGET_FUNCTION_VALUE.
9131 For the SH, this is like LIBCALL_VALUE, except that we must change the
9132 mode like PROMOTE_MODE does.
9133 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9134 tested here has to be kept in sync with the one in explow.c:promote_mode.
9137 static rtx
9138 sh_function_value (const_tree valtype,
9139 const_tree fn_decl_or_type,
9140 bool outgoing ATTRIBUTE_UNUSED)
9142 if (fn_decl_or_type
9143 && !DECL_P (fn_decl_or_type))
9144 fn_decl_or_type = NULL;
9146 return gen_rtx_REG (
9147 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9148 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9149 && (TREE_CODE (valtype) == INTEGER_TYPE
9150 || TREE_CODE (valtype) == ENUMERAL_TYPE
9151 || TREE_CODE (valtype) == BOOLEAN_TYPE
9152 || TREE_CODE (valtype) == REAL_TYPE
9153 || TREE_CODE (valtype) == OFFSET_TYPE))
9154 && sh_promote_prototypes (fn_decl_or_type)
9155 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9156 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9159 /* Worker function for TARGET_LIBCALL_VALUE. */
9161 static rtx
9162 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9164 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9167 /* Return true if N is a possible register number of function value. */
9169 static bool
9170 sh_function_value_regno_p (const unsigned int regno)
9172 return ((regno) == FIRST_RET_REG
9173 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9174 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9177 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9179 static bool
9180 sh_return_in_memory (const_tree type, const_tree fndecl)
9182 if (TARGET_SH5)
9184 if (TYPE_MODE (type) == BLKmode)
9185 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9186 else
9187 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9189 else
9191 return (TYPE_MODE (type) == BLKmode
9192 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9193 && TREE_CODE (type) == RECORD_TYPE));
9197 /* We actually emit the code in sh_expand_prologue. We used to use
9198 a static variable to flag that we need to emit this code, but that
9199 doesn't when inlining, when functions are deferred and then emitted
9200 later. Fortunately, we already have two flags that are part of struct
9201 function that tell if a function uses varargs or stdarg. */
9202 static void
9203 sh_setup_incoming_varargs (cumulative_args_t ca,
9204 enum machine_mode mode,
9205 tree type,
9206 int *pretend_arg_size,
9207 int second_time ATTRIBUTE_UNUSED)
9209 gcc_assert (cfun->stdarg);
9210 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9212 int named_parm_regs, anon_parm_regs;
9214 named_parm_regs = (ROUND_REG (*get_cumulative_args (ca), mode)
9215 + (mode == BLKmode
9216 ? ROUND_ADVANCE (int_size_in_bytes (type))
9217 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
9218 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9219 if (anon_parm_regs > 0)
9220 *pretend_arg_size = anon_parm_regs * 4;
9224 static bool
9225 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9227 return TARGET_SH5;
9230 static bool
9231 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9233 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9235 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9239 /* Define the offset between two registers, one to be eliminated, and
9240 the other its replacement, at the start of a routine. */
9243 initial_elimination_offset (int from, int to)
9245 int regs_saved;
9246 int regs_saved_rounding = 0;
9247 int total_saved_regs_space;
9248 int total_auto_space;
9249 int save_flags = target_flags;
9250 int copy_flags;
9251 HARD_REG_SET live_regs_mask;
9253 shmedia_space_reserved_for_target_registers = false;
9254 regs_saved = calc_live_regs (&live_regs_mask);
9255 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9257 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9259 shmedia_space_reserved_for_target_registers = true;
9260 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9263 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9264 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9265 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9267 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9268 copy_flags = target_flags;
9269 target_flags = save_flags;
9271 total_saved_regs_space = regs_saved + regs_saved_rounding;
9273 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9274 return total_saved_regs_space + total_auto_space
9275 + crtl->args.info.byref_regs * 8;
9277 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9278 return total_saved_regs_space + total_auto_space
9279 + crtl->args.info.byref_regs * 8;
9281 /* Initial gap between fp and sp is 0. */
9282 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9283 return 0;
9285 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9286 return rounded_frame_size (0);
9288 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9289 return rounded_frame_size (0);
9291 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9292 && (to == HARD_FRAME_POINTER_REGNUM
9293 || to == STACK_POINTER_REGNUM));
9294 if (TARGET_SH5)
9296 int n = total_saved_regs_space;
9297 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9298 save_schedule schedule;
9299 save_entry *entry;
9301 n += total_auto_space;
9303 /* If it wasn't saved, there's not much we can do. */
9304 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9305 return n;
9307 target_flags = copy_flags;
9309 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9310 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9311 if (entry->reg == pr_reg)
9313 target_flags = save_flags;
9314 return entry->offset;
9316 gcc_unreachable ();
9318 else
9319 return total_auto_space;
9322 /* Parse the -mfixed-range= option string. */
9323 void
9324 sh_fix_range (const char *const_str)
9326 int i, first, last;
9327 char *str, *dash, *comma;
9329 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9330 REG2 are either register names or register numbers. The effect
9331 of this option is to mark the registers in the range from REG1 to
9332 REG2 as ``fixed'' so they won't be used by the compiler. */
9334 i = strlen (const_str);
9335 str = (char *) alloca (i + 1);
9336 memcpy (str, const_str, i + 1);
9338 while (1)
9340 dash = strchr (str, '-');
9341 if (!dash)
9343 warning (0, "value of -mfixed-range must have form REG1-REG2");
9344 return;
9346 *dash = '\0';
9347 comma = strchr (dash + 1, ',');
9348 if (comma)
9349 *comma = '\0';
9351 first = decode_reg_name (str);
9352 if (first < 0)
9354 warning (0, "unknown register name: %s", str);
9355 return;
9358 last = decode_reg_name (dash + 1);
9359 if (last < 0)
9361 warning (0, "unknown register name: %s", dash + 1);
9362 return;
9365 *dash = '-';
9367 if (first > last)
9369 warning (0, "%s-%s is an empty range", str, dash + 1);
9370 return;
9373 for (i = first; i <= last; ++i)
9374 fixed_regs[i] = call_used_regs[i] = 1;
9376 if (!comma)
9377 break;
9379 *comma = ',';
9380 str = comma + 1;
9384 /* Insert any deferred function attributes from earlier pragmas. */
9385 static void
9386 sh_insert_attributes (tree node, tree *attributes)
9388 tree attrs;
9390 if (TREE_CODE (node) != FUNCTION_DECL)
9391 return;
9393 /* We are only interested in fields. */
9394 if (!DECL_P (node))
9395 return;
9397 /* Append the attributes to the deferred attributes. */
9398 *sh_deferred_function_attributes_tail = *attributes;
9399 attrs = sh_deferred_function_attributes;
9400 if (!attrs)
9401 return;
9403 /* Some attributes imply or require the interrupt attribute. */
9404 if (!lookup_attribute ("interrupt_handler", attrs)
9405 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9407 /* If we have a trapa_handler, but no interrupt_handler attribute,
9408 insert an interrupt_handler attribute. */
9409 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9410 /* We can't use sh_pr_interrupt here because that's not in the
9411 java frontend. */
9412 attrs
9413 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9414 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9415 if the interrupt attribute is missing, we ignore the attribute
9416 and warn. */
9417 else if (lookup_attribute ("sp_switch", attrs)
9418 || lookup_attribute ("trap_exit", attrs)
9419 || lookup_attribute ("nosave_low_regs", attrs)
9420 || lookup_attribute ("resbank", attrs))
9422 tree *tail;
9424 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9426 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9427 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9428 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9429 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9430 warning (OPT_Wattributes,
9431 "%qE attribute only applies to interrupt functions",
9432 TREE_PURPOSE (attrs));
9433 else
9435 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9436 NULL_TREE);
9437 tail = &TREE_CHAIN (*tail);
9440 attrs = *attributes;
9444 /* Install the processed list. */
9445 *attributes = attrs;
9447 /* Clear deferred attributes. */
9448 sh_deferred_function_attributes = NULL_TREE;
9449 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9451 return;
9454 /*------------------------------------------------------------------------------
9455 Target specific attributes
9456 Supported attributes are:
9458 * interrupt_handler
9459 Specifies this function is an interrupt handler.
9461 * trapa_handler
9462 Like interrupt_handler, but don't save all registers.
9464 * sp_switch
9465 Specifies an alternate stack for an interrupt handler to run on.
9467 * trap_exit
9468 Use a trapa to exit an interrupt function instead of rte.
9470 * nosave_low_regs
9471 Don't save r0..r7 in an interrupt handler function.
9472 This is useful on SH3* and SH4*, which have a separate set of low
9473 regs for user and privileged modes.
9474 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9475 those that run with interrupts disabled and thus can't be
9476 interrupted thenselves).
9478 * renesas
9479 Use Renesas calling/layout conventions (functions and structures).
9481 * resbank
9482 In case of an interrupt handler function, use a register bank to
9483 save registers R0-R14, MACH, MACL, GBR and PR.
9484 This is available only on SH2A targets.
9486 * function_vector
9487 Declares a function to be called using the TBR relative addressing
9488 mode. Takes an argument that specifies the slot number in the table
9489 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9492 /* Handle a 'resbank' attribute. */
9493 static tree
9494 sh_handle_resbank_handler_attribute (tree * node, tree name,
9495 tree args ATTRIBUTE_UNUSED,
9496 int flags ATTRIBUTE_UNUSED,
9497 bool * no_add_attrs)
9499 if (!TARGET_SH2A)
9501 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9502 name);
9503 *no_add_attrs = true;
9505 if (TREE_CODE (*node) != FUNCTION_DECL)
9507 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9508 name);
9509 *no_add_attrs = true;
9512 return NULL_TREE;
9515 /* Handle an "interrupt_handler" attribute; arguments as in
9516 struct attribute_spec.handler. */
9517 static tree
9518 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9519 tree args ATTRIBUTE_UNUSED,
9520 int flags ATTRIBUTE_UNUSED,
9521 bool *no_add_attrs)
9523 if (TREE_CODE (*node) != FUNCTION_DECL)
9525 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9526 name);
9527 *no_add_attrs = true;
9529 else if (TARGET_SHCOMPACT)
9531 error ("attribute interrupt_handler is not compatible with -m5-compact");
9532 *no_add_attrs = true;
9535 return NULL_TREE;
9538 /* Handle an 'function_vector' attribute; arguments as in
9539 struct attribute_spec.handler. */
9540 static tree
9541 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9542 tree args ATTRIBUTE_UNUSED,
9543 int flags ATTRIBUTE_UNUSED,
9544 bool * no_add_attrs)
9546 if (!TARGET_SH2A)
9548 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9549 name);
9550 *no_add_attrs = true;
9552 else if (TREE_CODE (*node) != FUNCTION_DECL)
9554 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9555 name);
9556 *no_add_attrs = true;
9558 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9560 /* The argument must be a constant integer. */
9561 warning (OPT_Wattributes,
9562 "%qE attribute argument not an integer constant",
9563 name);
9564 *no_add_attrs = true;
9566 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9568 /* The argument value must be between 0 to 255. */
9569 warning (OPT_Wattributes,
9570 "%qE attribute argument should be between 0 to 255",
9571 name);
9572 *no_add_attrs = true;
9574 return NULL_TREE;
9577 /* Returns true if current function has been assigned the attribute
9578 'function_vector'. */
9579 bool
9580 sh2a_is_function_vector_call (rtx x)
9582 if (GET_CODE (x) == SYMBOL_REF
9583 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9585 tree tr = SYMBOL_REF_DECL (x);
9587 if (sh2a_function_vector_p (tr))
9588 return true;
9591 return false;
9594 /* Returns the function vector number, if the attribute
9595 'function_vector' is assigned, otherwise returns zero. */
9597 sh2a_get_function_vector_number (rtx x)
9599 int num;
9600 tree list, t;
9602 if ((GET_CODE (x) == SYMBOL_REF)
9603 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9605 t = SYMBOL_REF_DECL (x);
9607 if (TREE_CODE (t) != FUNCTION_DECL)
9608 return 0;
9610 list = SH_ATTRIBUTES (t);
9611 while (list)
9613 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9615 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9616 return num;
9619 list = TREE_CHAIN (list);
9622 return 0;
9624 else
9625 return 0;
9628 /* Handle an "sp_switch" attribute; arguments as in
9629 struct attribute_spec.handler. */
9630 static tree
9631 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9632 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9634 if (TREE_CODE (*node) != FUNCTION_DECL)
9636 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9637 name);
9638 *no_add_attrs = true;
9640 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9642 /* The argument must be a constant string. */
9643 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9644 name);
9645 *no_add_attrs = true;
9648 return NULL_TREE;
9651 /* Handle an "trap_exit" attribute; arguments as in
9652 struct attribute_spec.handler. */
9653 static tree
9654 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9655 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9657 if (TREE_CODE (*node) != FUNCTION_DECL)
9659 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9660 name);
9661 *no_add_attrs = true;
9663 /* The argument specifies a trap number to be used in a trapa instruction
9664 at function exit (instead of an rte instruction). */
9665 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9667 /* The argument must be a constant integer. */
9668 warning (OPT_Wattributes, "%qE attribute argument not an "
9669 "integer constant", name);
9670 *no_add_attrs = true;
9673 return NULL_TREE;
9676 static tree
9677 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9678 tree name ATTRIBUTE_UNUSED,
9679 tree args ATTRIBUTE_UNUSED,
9680 int flags ATTRIBUTE_UNUSED,
9681 bool *no_add_attrs ATTRIBUTE_UNUSED)
9683 return NULL_TREE;
9686 /* True if __attribute__((renesas)) or -mrenesas. */
9687 bool
9688 sh_attr_renesas_p (const_tree td)
9690 if (TARGET_HITACHI)
9691 return true;
9692 if (td == NULL_TREE)
9693 return false;
9694 if (DECL_P (td))
9695 td = TREE_TYPE (td);
9696 if (td == error_mark_node)
9697 return false;
9698 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9699 != NULL_TREE);
9702 /* True if __attribute__((renesas)) or -mrenesas, for the current
9703 function. */
9704 bool
9705 sh_cfun_attr_renesas_p (void)
9707 return sh_attr_renesas_p (current_function_decl);
9710 bool
9711 sh_cfun_interrupt_handler_p (void)
9713 return (lookup_attribute ("interrupt_handler",
9714 DECL_ATTRIBUTES (current_function_decl))
9715 != NULL_TREE);
9718 /* Returns true if FUNC has been assigned the attribute
9719 "function_vector". */
9720 bool
9721 sh2a_function_vector_p (tree func)
9723 tree list;
9724 if (TREE_CODE (func) != FUNCTION_DECL)
9725 return false;
9727 list = SH_ATTRIBUTES (func);
9728 while (list)
9730 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9731 return true;
9733 list = TREE_CHAIN (list);
9735 return false;
9738 /* Returns TRUE if given tree has the "resbank" attribute. */
9740 bool
9741 sh_cfun_resbank_handler_p (void)
9743 return ((lookup_attribute ("resbank",
9744 DECL_ATTRIBUTES (current_function_decl))
9745 != NULL_TREE)
9746 && (lookup_attribute ("interrupt_handler",
9747 DECL_ATTRIBUTES (current_function_decl))
9748 != NULL_TREE) && TARGET_SH2A);
9751 /* Returns true if the current function has a "trap_exit" attribute set. */
9753 bool
9754 sh_cfun_trap_exit_p (void)
9756 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9757 != NULL_TREE;
9760 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9762 static const char *
9763 sh_check_pch_target_flags (int old_flags)
9765 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9766 | MASK_SH_E | MASK_HARD_SH4
9767 | MASK_FPU_SINGLE | MASK_SH4))
9768 return _("created and used with different architectures / ABIs");
9769 if ((old_flags ^ target_flags) & MASK_HITACHI)
9770 return _("created and used with different ABIs");
9771 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9772 return _("created and used with different endianness");
9773 return NULL;
9776 /* Predicates used by the templates. */
9778 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9779 Used only in general_movsrc_operand. */
9781 bool
9782 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9784 switch (REGNO (op))
9786 case PR_REG:
9787 case MACL_REG:
9788 case MACH_REG:
9789 return true;
9791 return false;
9794 /* Returns true if OP is a floating point value with value 0.0. */
9796 bool
9797 fp_zero_operand (rtx op)
9799 REAL_VALUE_TYPE r;
9801 if (GET_MODE (op) != SFmode)
9802 return false;
9804 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9805 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9808 /* Returns true if OP is a floating point value with value 1.0. */
9810 bool
9811 fp_one_operand (rtx op)
9813 REAL_VALUE_TYPE r;
9815 if (GET_MODE (op) != SFmode)
9816 return false;
9818 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9819 return REAL_VALUES_EQUAL (r, dconst1);
9822 /* In general mode switching is used. If we are
9823 compiling without -mfmovd, movsf_ie isn't taken into account for
9824 mode switching. We could check in machine_dependent_reorg for
9825 cases where we know we are in single precision mode, but there is
9826 interface to find that out during reload, so we must avoid
9827 choosing an fldi alternative during reload and thus failing to
9828 allocate a scratch register for the constant loading. */
9829 bool
9830 fldi_ok (void)
9832 return true;
9835 /* Return the TLS type for TLS symbols, 0 for otherwise. */
9836 enum tls_model
9837 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9839 if (GET_CODE (op) != SYMBOL_REF)
9840 return TLS_MODEL_NONE;
9841 return SYMBOL_REF_TLS_MODEL (op);
9844 /* Return the destination address of a branch. */
9846 static int
9847 branch_dest (rtx branch)
9849 rtx dest = SET_SRC (PATTERN (branch));
9850 int dest_uid;
9852 if (GET_CODE (dest) == IF_THEN_ELSE)
9853 dest = XEXP (dest, 1);
9854 dest = XEXP (dest, 0);
9855 dest_uid = INSN_UID (dest);
9856 return INSN_ADDRESSES (dest_uid);
9859 /* Return nonzero if REG is not used after INSN.
9860 We assume REG is a reload reg, and therefore does
9861 not live past labels. It may live past calls or jumps though. */
9862 bool
9863 reg_unused_after (rtx reg, rtx insn)
9865 enum rtx_code code;
9866 rtx set;
9868 /* If the reg is set by this instruction, then it is safe for our
9869 case. Disregard the case where this is a store to memory, since
9870 we are checking a register used in the store address. */
9871 set = single_set (insn);
9872 if (set && !MEM_P (SET_DEST (set))
9873 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9874 return true;
9876 while ((insn = NEXT_INSN (insn)))
9878 rtx set;
9879 if (!INSN_P (insn))
9880 continue;
9882 code = GET_CODE (insn);
9884 #if 0
9885 /* If this is a label that existed before reload, then the register
9886 is dead here. However, if this is a label added by reorg, then
9887 the register may still be live here. We can't tell the difference,
9888 so we just ignore labels completely. */
9889 if (code == CODE_LABEL)
9890 return 1;
9891 /* else */
9892 #endif
9894 if (code == JUMP_INSN)
9895 return false;
9897 /* If this is a sequence, we must handle them all at once.
9898 We could have for instance a call that sets the target register,
9899 and an insn in a delay slot that uses the register. In this case,
9900 we must return 0. */
9901 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9903 int i;
9904 int retval = 0;
9906 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9908 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9909 rtx set = single_set (this_insn);
9911 if (CALL_P (this_insn))
9912 code = CALL_INSN;
9913 else if (JUMP_P (this_insn))
9915 if (INSN_ANNULLED_BRANCH_P (this_insn))
9916 return false;
9917 code = JUMP_INSN;
9920 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9921 return false;
9922 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9924 if (!MEM_P (SET_DEST (set)))
9925 retval = true;
9926 else
9927 return false;
9929 if (set == NULL_RTX
9930 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9931 return false;
9933 if (retval == 1)
9934 return true;
9935 else if (code == JUMP_INSN)
9936 return false;
9939 set = single_set (insn);
9940 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9941 return false;
9942 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9943 return !MEM_P (SET_DEST (set));
9944 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9945 return false;
9947 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9948 return true;
9950 return true;
9953 #include "ggc.h"
9955 static GTY(()) rtx t_reg_rtx;
9957 get_t_reg_rtx (void)
9959 if (! t_reg_rtx)
9960 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
9961 return t_reg_rtx;
9964 static GTY(()) rtx fpscr_rtx;
9966 get_fpscr_rtx (void)
9968 if (! fpscr_rtx)
9970 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9971 REG_USERVAR_P (fpscr_rtx) = 1;
9972 mark_user_reg (fpscr_rtx);
9974 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9975 mark_user_reg (fpscr_rtx);
9976 return fpscr_rtx;
9979 static GTY(()) tree fpscr_values;
9981 static void
9982 emit_fpu_switch (rtx scratch, int index)
9984 rtx dst, src;
9986 if (fpscr_values == NULL)
9988 tree t;
9990 t = build_index_type (integer_one_node);
9991 t = build_array_type (integer_type_node, t);
9992 t = build_decl (BUILTINS_LOCATION,
9993 VAR_DECL, get_identifier ("__fpscr_values"), t);
9994 DECL_ARTIFICIAL (t) = 1;
9995 DECL_IGNORED_P (t) = 1;
9996 DECL_EXTERNAL (t) = 1;
9997 TREE_STATIC (t) = 1;
9998 TREE_PUBLIC (t) = 1;
9999 TREE_USED (t) = 1;
10001 fpscr_values = t;
10004 src = DECL_RTL (fpscr_values);
10005 if (!can_create_pseudo_p ())
10007 emit_move_insn (scratch, XEXP (src, 0));
10008 if (index != 0)
10009 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10010 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
10012 else
10013 src = adjust_address (src, PSImode, index * 4);
10015 dst = get_fpscr_rtx ();
10016 emit_move_insn (dst, src);
10019 void
10020 emit_sf_insn (rtx pat)
10022 emit_insn (pat);
10025 void
10026 emit_df_insn (rtx pat)
10028 emit_insn (pat);
10031 void
10032 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10034 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10037 void
10038 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10040 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
10041 get_fpscr_rtx ()));
10044 void
10045 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10047 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10050 void
10051 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10053 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
10054 get_fpscr_rtx ()));
10057 static rtx get_free_reg (HARD_REG_SET);
10059 /* This function returns a register to use to load the address to load
10060 the fpscr from. Currently it always returns r1 or r7, but when we are
10061 able to use pseudo registers after combine, or have a better mechanism
10062 for choosing a register, it should be done here. */
10063 /* REGS_LIVE is the liveness information for the point for which we
10064 need this allocation. In some bare-bones exit blocks, r1 is live at the
10065 start. We can even have all of r0..r3 being live:
10066 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10067 INSN before which new insns are placed with will clobber the register
10068 we return. If a basic block consists only of setting the return value
10069 register to a pseudo and using that register, the return value is not
10070 live before or after this block, yet we we'll insert our insns right in
10071 the middle. */
10073 static rtx
10074 get_free_reg (HARD_REG_SET regs_live)
10076 if (! TEST_HARD_REG_BIT (regs_live, 1))
10077 return gen_rtx_REG (Pmode, 1);
10079 /* Hard reg 1 is live; since this is a small register classes target,
10080 there shouldn't be anything but a jump before the function end. */
10081 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10082 return gen_rtx_REG (Pmode, 7);
10085 /* This function will set the fpscr from memory.
10086 MODE is the mode we are setting it to. */
10087 void
10088 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10090 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10091 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10092 rtx addr_reg;
10094 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10095 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10098 /* Is the given character a logical line separator for the assembler? */
10099 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10100 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10101 #endif
10103 static bool
10104 sequence_insn_p (rtx insn)
10106 rtx prev, next;
10108 prev = PREV_INSN (insn);
10109 if (prev == NULL)
10110 return false;
10112 next = NEXT_INSN (prev);
10113 if (next == NULL)
10114 return false;
10116 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10120 sh_insn_length_adjustment (rtx insn)
10122 /* Instructions with unfilled delay slots take up an extra two bytes for
10123 the nop in the delay slot. */
10124 if (((NONJUMP_INSN_P (insn)
10125 && GET_CODE (PATTERN (insn)) != USE
10126 && GET_CODE (PATTERN (insn)) != CLOBBER)
10127 || CALL_P (insn)
10128 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
10129 && ! sequence_insn_p (insn)
10130 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10131 return 2;
10133 /* SH2e has a bug that prevents the use of annulled branches, so if
10134 the delay slot is not filled, we'll have to put a NOP in it. */
10135 if (sh_cpu_attr == CPU_SH2E
10136 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
10137 && get_attr_type (insn) == TYPE_CBRANCH
10138 && ! sequence_insn_p (insn))
10139 return 2;
10141 /* sh-dsp parallel processing insn take four bytes instead of two. */
10143 if (NONJUMP_INSN_P (insn))
10145 int sum = 0;
10146 rtx body = PATTERN (insn);
10147 const char *templ;
10148 char c;
10149 bool maybe_label = true;
10151 if (GET_CODE (body) == ASM_INPUT)
10152 templ = XSTR (body, 0);
10153 else if (asm_noperands (body) >= 0)
10154 templ
10155 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10156 else
10157 return 0;
10160 int ppi_adjust = 0;
10163 c = *templ++;
10164 while (c == ' ' || c == '\t');
10165 /* all sh-dsp parallel-processing insns start with p.
10166 The only non-ppi sh insn starting with p is pref.
10167 The only ppi starting with pr is prnd. */
10168 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10169 ppi_adjust = 2;
10170 /* The repeat pseudo-insn expands two three insns, a total of
10171 six bytes in size. */
10172 else if ((c == 'r' || c == 'R')
10173 && ! strncasecmp ("epeat", templ, 5))
10174 ppi_adjust = 4;
10175 while (c && c != '\n'
10176 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10178 /* If this is a label, it is obviously not a ppi insn. */
10179 if (c == ':' && maybe_label)
10181 ppi_adjust = 0;
10182 break;
10184 else if (c == '\'' || c == '"')
10185 maybe_label = false;
10186 c = *templ++;
10188 sum += ppi_adjust;
10189 maybe_label = c != ':';
10191 while (c);
10192 return sum;
10194 return 0;
10197 /* Return TRUE for a valid displacement for the REG+disp addressing
10198 with MODE. */
10200 bool
10201 sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
10202 bool allow_zero)
10204 if (! CONST_INT_P (op))
10205 return false;
10207 if (TARGET_SHMEDIA)
10209 int size;
10211 /* Check if this is the address of an unaligned load / store. */
10212 if (mode == VOIDmode)
10213 return satisfies_constraint_I06 (op);
10215 size = GET_MODE_SIZE (mode);
10216 return (!(INTVAL (op) & (size - 1))
10217 && INTVAL (op) >= -512 * size
10218 && INTVAL (op) < 512 * size);
10220 else
10222 const HOST_WIDE_INT offset = INTVAL (op);
10223 const int max_disp = max_mov_insn_displacement (mode, consider_sh2a);
10224 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10226 /* If the mode does not support any displacement always return false.
10227 Even though an index of '0' is actually always valid, it will cause
10228 troubles when e.g. a DFmode move is split into two SFmode moves,
10229 where one SFmode move will have index '0' and the other move will
10230 have index '4'. */
10231 if (!allow_zero && max_disp < 1)
10232 return false;
10234 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10238 /* Recognize an RTL expression that is a valid memory address for
10239 an instruction.
10240 The MODE argument is the machine mode for the MEM expression
10241 that wants to use this address.
10242 Allow REG
10243 REG+disp
10244 REG+r0
10245 REG++
10246 --REG
10248 GBR+disp */
10250 static bool
10251 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
10253 if (REG_P (x) && REGNO (x) == GBR_REG)
10254 return true;
10256 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10257 return true;
10258 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10259 && ! TARGET_SHMEDIA
10260 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10261 return true;
10262 else if (GET_CODE (x) == PLUS
10263 && (mode != PSImode || reload_completed))
10265 rtx xop0 = XEXP (x, 0);
10266 rtx xop1 = XEXP (x, 1);
10268 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10269 return gbr_displacement (xop1, mode);
10271 if (GET_MODE_SIZE (mode) <= 8
10272 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10273 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10274 return true;
10276 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10277 || ((xop0 == stack_pointer_rtx
10278 || xop0 == hard_frame_pointer_rtx)
10279 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10280 || ((xop1 == stack_pointer_rtx
10281 || xop1 == hard_frame_pointer_rtx)
10282 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10283 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10284 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10285 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10286 && TARGET_FMOVD && mode == DFmode)))
10288 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10289 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10290 return true;
10291 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10292 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10293 return true;
10297 return false;
10300 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10301 isn't protected by a PIC unspec. */
10302 bool
10303 nonpic_symbol_mentioned_p (rtx x)
10305 const char *fmt;
10306 int i;
10308 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10309 || GET_CODE (x) == PC)
10310 return true;
10312 /* We don't want to look into the possible MEM location of a
10313 CONST_DOUBLE, since we're not going to use it, in general. */
10314 if (GET_CODE (x) == CONST_DOUBLE)
10315 return false;
10317 if (GET_CODE (x) == UNSPEC
10318 && (XINT (x, 1) == UNSPEC_PIC
10319 || XINT (x, 1) == UNSPEC_GOT
10320 || XINT (x, 1) == UNSPEC_GOTOFF
10321 || XINT (x, 1) == UNSPEC_GOTPLT
10322 || XINT (x, 1) == UNSPEC_GOTTPOFF
10323 || XINT (x, 1) == UNSPEC_DTPOFF
10324 || XINT (x, 1) == UNSPEC_TPOFF
10325 || XINT (x, 1) == UNSPEC_PLT
10326 || XINT (x, 1) == UNSPEC_SYMOFF
10327 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10328 return false;
10330 fmt = GET_RTX_FORMAT (GET_CODE (x));
10331 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10333 if (fmt[i] == 'E')
10335 int j;
10336 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10337 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10338 return true;
10340 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10341 return true;
10344 return false;
10347 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10348 @GOTOFF in `reg'. */
10350 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
10351 rtx reg)
10353 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10354 return orig;
10356 if (GET_CODE (orig) == LABEL_REF
10357 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10359 if (reg == NULL_RTX)
10360 reg = gen_reg_rtx (Pmode);
10362 emit_insn (gen_symGOTOFF2reg (reg, orig));
10363 return reg;
10365 else if (GET_CODE (orig) == SYMBOL_REF)
10367 if (reg == NULL_RTX)
10368 reg = gen_reg_rtx (Pmode);
10370 emit_insn (gen_symGOT2reg (reg, orig));
10371 return reg;
10373 return orig;
10376 /* Given a (logical) mode size and an offset in bytes, try to find a the
10377 appropriate displacement value for a mov insn. On SH the displacements
10378 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10379 15 bytes in QImode. To compensate this we create a new base address by
10380 adding an adjustment value to it.
10382 If the originally requested offset is greater than 127 we prefer using
10383 values 124..127 over 128..131 to increase opportunities to use the
10384 add #imm, Rn insn.
10386 In some cases it is possible that a requested offset might seem unaligned
10387 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10388 This is compensated by adjusting the base address so that the effective
10389 address of the displacement move insn will be aligned.
10391 This is not the best possible way of rebasing the base address, as it
10392 does not look at other present displacement addressings around it.
10393 In some cases this can create more base address adjustments than would
10394 actually be necessary. */
10396 struct disp_adjust
10398 rtx offset_adjust;
10399 rtx mov_disp;
10402 static struct disp_adjust
10403 sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
10405 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10407 /* Do not try to use SH2A's large displacements here, because this would
10408 effectively disable the small displacement insns. */
10409 const int mode_sz = GET_MODE_SIZE (mode);
10410 const int mov_insn_sz = mov_insn_size (mode, false);
10411 const int max_disp = max_mov_insn_displacement (mode, false);
10412 const int max_disp_next = max_disp + mov_insn_sz;
10413 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10414 HOST_WIDE_INT offset_adjust;
10416 /* In some cases this actually does happen and we must check for it. */
10417 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10418 return res;
10420 /* Keeps the previous behavior for QImode displacement addressing.
10421 This just decides how the offset is re-based. Removing this special
10422 case will result in slightly bigger code on average, but it's not that
10423 bad actually. */
10424 if (mov_insn_sz == 1)
10425 align_modifier = 0;
10427 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10429 if (mode_sz + offset - offset_adjust <= max_disp_next)
10431 res.offset_adjust = GEN_INT (offset_adjust);
10432 res.mov_disp = GEN_INT (offset - offset_adjust);
10435 return res;
10438 /* Try to modify an illegitimate address and make it legitimate.
10439 If we find one, return the new, valid address.
10440 Otherwise, return the original address. */
10442 static rtx
10443 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
10445 if (flag_pic)
10446 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10448 if (TARGET_SHMEDIA)
10449 return x;
10451 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10452 || (TARGET_SH2E && mode == SFmode))
10453 return x;
10455 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10456 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10458 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10459 INTVAL (XEXP (x, 1)));
10461 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10463 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10464 adj.offset_adjust, NULL_RTX, 0,
10465 OPTAB_LIB_WIDEN);
10466 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10470 return x;
10473 /* Attempt to replace *p, which is an address that needs reloading, with
10474 a valid memory address for an operand of mode MODE.
10475 Like for sh_legitimize_address, for the SH we try to get a normal form
10476 of the address. That will allow inheritance of the address reloads. */
10478 bool
10479 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
10480 int itype)
10482 enum reload_type type = (enum reload_type) itype;
10483 const int mode_sz = GET_MODE_SIZE (mode);
10485 if (TARGET_SHMEDIA)
10486 return false;
10488 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10489 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10490 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
10491 && (ALLOW_INDEXED_ADDRESS
10492 || XEXP (*p, 0) == stack_pointer_rtx
10493 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10495 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10496 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10498 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10500 push_reload (*p, NULL_RTX, p, NULL,
10501 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10502 return true;
10505 if (TARGET_SH2E && mode == SFmode)
10507 *p = copy_rtx (*p);
10508 push_reload (*p, NULL_RTX, p, NULL,
10509 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10510 return true;
10513 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10514 moves because then reload has a problem figuring the constraint
10515 that the move insn target/source reg must be R0.
10516 Or maybe some handling is wrong in sh_secondary_reload for this
10517 to work properly? */
10518 if ((mode_sz == 4 || mode_sz == 8)
10519 && ! (TARGET_SH4 && mode == DFmode)
10520 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10522 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10523 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10524 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10525 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10526 return true;
10530 /* We must re-recognize what we created before. */
10531 if (GET_CODE (*p) == PLUS
10532 && (mode_sz == 4 || mode_sz == 8)
10533 && GET_CODE (XEXP (*p, 0)) == PLUS
10534 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10535 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10536 && CONST_INT_P (XEXP (*p, 1))
10537 && ! (TARGET_SH2E && mode == SFmode))
10539 /* Because this address is so complex, we know it must have
10540 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10541 it is already unshared, and needs no further unsharing. */
10542 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10543 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10544 return true;
10547 return false;
10550 /* In the name of slightly smaller debug output, and to cater to
10551 general assembler lossage, recognize various UNSPEC sequences
10552 and turn them back into a direct symbol reference. */
10554 static rtx
10555 sh_delegitimize_address (rtx orig_x)
10557 rtx x, y;
10559 orig_x = delegitimize_mem_from_attrs (orig_x);
10561 x = orig_x;
10562 if (MEM_P (x))
10563 x = XEXP (x, 0);
10564 if (GET_CODE (x) == CONST)
10566 y = XEXP (x, 0);
10567 if (GET_CODE (y) == UNSPEC)
10569 if (XINT (y, 1) == UNSPEC_GOT
10570 || XINT (y, 1) == UNSPEC_GOTOFF
10571 || XINT (y, 1) == UNSPEC_SYMOFF)
10572 return XVECEXP (y, 0, 0);
10573 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10575 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10577 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10579 if (GET_CODE (symplt) == UNSPEC
10580 && XINT (symplt, 1) == UNSPEC_PLT)
10581 return XVECEXP (symplt, 0, 0);
10584 else if (TARGET_SHMEDIA
10585 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10586 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10588 rtx offset = XVECEXP (y, 0, 1);
10590 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10591 if (MEM_P (orig_x))
10592 x = replace_equiv_address_nv (orig_x, x);
10593 return x;
10598 return orig_x;
10601 /* Mark the use of a constant in the literal table. If the constant
10602 has multiple labels, make it unique. */
10603 static rtx
10604 mark_constant_pool_use (rtx x)
10606 rtx insn, lab, pattern;
10608 if (x == NULL_RTX)
10609 return x;
10611 switch (GET_CODE (x))
10613 case LABEL_REF:
10614 x = XEXP (x, 0);
10615 case CODE_LABEL:
10616 break;
10617 default:
10618 return x;
10621 /* Get the first label in the list of labels for the same constant
10622 and delete another labels in the list. */
10623 lab = x;
10624 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
10626 if (!LABEL_P (insn)
10627 || LABEL_REFS (insn) != NEXT_INSN (insn))
10628 break;
10629 lab = insn;
10632 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10633 INSN_DELETED_P (insn) = 1;
10635 /* Mark constants in a window. */
10636 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
10638 if (!NONJUMP_INSN_P (insn))
10639 continue;
10641 pattern = PATTERN (insn);
10642 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10643 continue;
10645 switch (XINT (pattern, 1))
10647 case UNSPECV_CONST2:
10648 case UNSPECV_CONST4:
10649 case UNSPECV_CONST8:
10650 XVECEXP (pattern, 0, 1) = const1_rtx;
10651 break;
10652 case UNSPECV_WINDOW_END:
10653 if (XVECEXP (pattern, 0, 0) == x)
10654 return lab;
10655 break;
10656 case UNSPECV_CONST_END:
10657 return lab;
10658 default:
10659 break;
10663 return lab;
10666 /* Return true if it's possible to redirect BRANCH1 to the destination
10667 of an unconditional jump BRANCH2. We only want to do this if the
10668 resulting branch will have a short displacement. */
10669 bool
10670 sh_can_redirect_branch (rtx branch1, rtx branch2)
10672 if (flag_expensive_optimizations && simplejump_p (branch2))
10674 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10675 rtx insn;
10676 int distance;
10678 for (distance = 0, insn = NEXT_INSN (branch1);
10679 insn && distance < 256;
10680 insn = PREV_INSN (insn))
10682 if (insn == dest)
10683 return true;
10684 else
10685 distance += get_attr_length (insn);
10687 for (distance = 0, insn = NEXT_INSN (branch1);
10688 insn && distance < 256;
10689 insn = NEXT_INSN (insn))
10691 if (insn == dest)
10692 return true;
10693 else
10694 distance += get_attr_length (insn);
10697 return false;
10700 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10701 bool
10702 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10703 unsigned int new_reg)
10705 /* Interrupt functions can only use registers that have already been
10706 saved by the prologue, even if they would normally be
10707 call-clobbered. */
10709 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10710 return false;
10712 return true;
10715 /* Function to update the integer COST
10716 based on the relationship between INSN that is dependent on
10717 DEP_INSN through the dependence LINK. The default is to make no
10718 adjustment to COST. This can be used for example to specify to
10719 the scheduler that an output- or anti-dependence does not incur
10720 the same cost as a data-dependence. The return value should be
10721 the new value for COST. */
10722 static int
10723 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10725 rtx reg, use_pat;
10727 if (TARGET_SHMEDIA)
10729 /* On SHmedia, if the dependence is an anti-dependence or
10730 output-dependence, there is no cost. */
10731 if (REG_NOTE_KIND (link) != 0)
10733 /* However, dependencies between target register loads and
10734 uses of the register in a subsequent block that are separated
10735 by a conditional branch are not modelled - we have to do with
10736 the anti-dependency between the target register load and the
10737 conditional branch that ends the current block. */
10738 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10739 && GET_CODE (PATTERN (dep_insn)) == SET
10740 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10741 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10742 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10744 int orig_cost = cost;
10745 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10746 rtx target = ((! note
10747 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
10748 ? insn : JUMP_LABEL (insn));
10749 /* On the likely path, the branch costs 1, on the unlikely path,
10750 it costs 3. */
10751 cost--;
10753 target = next_active_insn (target);
10754 while (target && ! flow_dependent_p (target, dep_insn)
10755 && --cost > 0);
10756 /* If two branches are executed in immediate succession, with the
10757 first branch properly predicted, this causes a stall at the
10758 second branch, hence we won't need the target for the
10759 second branch for two cycles after the launch of the first
10760 branch. */
10761 if (cost > orig_cost - 2)
10762 cost = orig_cost - 2;
10764 else
10765 cost = 0;
10768 else if (get_attr_is_mac_media (insn)
10769 && get_attr_is_mac_media (dep_insn))
10770 cost = 1;
10772 else if (! reload_completed
10773 && GET_CODE (PATTERN (insn)) == SET
10774 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10775 && GET_CODE (PATTERN (dep_insn)) == SET
10776 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10777 && cost < 4)
10778 cost = 4;
10779 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10780 that is needed at the target. */
10781 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10782 && ! flow_dependent_p (insn, dep_insn))
10783 cost--;
10785 else if (REG_NOTE_KIND (link) == 0)
10787 enum attr_type type;
10788 rtx dep_set;
10790 if (recog_memoized (insn) < 0
10791 || recog_memoized (dep_insn) < 0)
10792 return cost;
10794 dep_set = single_set (dep_insn);
10796 /* The latency that we specify in the scheduling description refers
10797 to the actual output, not to an auto-increment register; for that,
10798 the latency is one. */
10799 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10801 rtx set = single_set (insn);
10803 if (set
10804 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10805 && (!MEM_P (SET_DEST (set))
10806 || !reg_mentioned_p (SET_DEST (dep_set),
10807 XEXP (SET_DEST (set), 0))))
10808 cost = 1;
10810 /* The only input for a call that is timing-critical is the
10811 function's address. */
10812 if (CALL_P (insn))
10814 rtx call = get_call_rtx_from (insn);
10815 if (call
10816 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10817 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10818 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10819 cost -= TARGET_SH4_300 ? 3 : 6;
10821 /* Likewise, the most timing critical input for an sfuncs call
10822 is the function address. However, sfuncs typically start
10823 using their arguments pretty quickly.
10824 Assume a four cycle delay for SH4 before they are needed.
10825 Cached ST40-300 calls are quicker, so assume only a one
10826 cycle delay there.
10827 ??? Maybe we should encode the delays till input registers
10828 are needed by sfuncs into the sfunc call insn. */
10829 /* All sfunc calls are parallels with at least four components.
10830 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10831 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10832 && XVECLEN (PATTERN (insn), 0) >= 4
10833 && (reg = sfunc_uses_reg (insn)))
10835 if (! reg_set_p (reg, dep_insn))
10836 cost -= TARGET_SH4_300 ? 1 : 4;
10838 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10840 enum attr_type dep_type = get_attr_type (dep_insn);
10842 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10843 cost--;
10844 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10845 && (type = get_attr_type (insn)) != TYPE_CALL
10846 && type != TYPE_SFUNC)
10847 cost--;
10848 /* When the preceding instruction loads the shift amount of
10849 the following SHAD/SHLD, the latency of the load is increased
10850 by 1 cycle. */
10851 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10852 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10853 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10854 XEXP (SET_SRC (single_set (insn)),
10855 1)))
10856 cost++;
10857 /* When an LS group instruction with a latency of less than
10858 3 cycles is followed by a double-precision floating-point
10859 instruction, FIPR, or FTRV, the latency of the first
10860 instruction is increased to 3 cycles. */
10861 else if (cost < 3
10862 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10863 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10864 cost = 3;
10865 /* The lsw register of a double-precision computation is ready one
10866 cycle earlier. */
10867 else if (reload_completed
10868 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10869 && (use_pat = single_set (insn))
10870 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10871 SET_SRC (use_pat)))
10872 cost -= 1;
10874 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10875 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10876 cost -= 1;
10878 else if (TARGET_SH4_300)
10880 /* Stores need their input register two cycles later. */
10881 if (dep_set && cost >= 1
10882 && ((type = get_attr_type (insn)) == TYPE_STORE
10883 || type == TYPE_PSTORE
10884 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10886 rtx set = single_set (insn);
10888 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10889 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10891 cost -= 2;
10892 /* But don't reduce the cost below 1 if the address depends
10893 on a side effect of dep_insn. */
10894 if (cost < 1
10895 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10896 cost = 1;
10901 /* An anti-dependence penalty of two applies if the first insn is a double
10902 precision fadd / fsub / fmul. */
10903 else if (!TARGET_SH4_300
10904 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10905 && recog_memoized (dep_insn) >= 0
10906 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10907 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10908 /* A lot of alleged anti-flow dependences are fake,
10909 so check this one is real. */
10910 && flow_dependent_p (dep_insn, insn))
10911 cost = 2;
10913 return cost;
10916 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10917 if DEP_INSN is anti-flow dependent on INSN. */
10918 static bool
10919 flow_dependent_p (rtx insn, rtx dep_insn)
10921 rtx tmp = PATTERN (insn);
10923 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10924 return tmp == NULL_RTX;
10927 /* A helper function for flow_dependent_p called through note_stores. */
10928 static void
10929 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10931 rtx * pinsn = (rtx *) data;
10933 if (*pinsn && reg_referenced_p (x, *pinsn))
10934 *pinsn = NULL_RTX;
10937 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10938 'special function' patterns (type sfunc) that clobber pr, but that
10939 do not look like function calls to leaf_function_p. Hence we must
10940 do this extra check. */
10941 static int
10942 sh_pr_n_sets (void)
10944 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10947 /* Return where to allocate pseudo for a given hard register initial
10948 value. */
10949 static rtx
10950 sh_allocate_initial_value (rtx hard_reg)
10952 rtx x;
10954 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10956 if (crtl->is_leaf
10957 && ! sh_pr_n_sets ()
10958 && ! (TARGET_SHCOMPACT
10959 && ((crtl->args.info.call_cookie
10960 & ~ CALL_COOKIE_RET_TRAMP (1))
10961 || crtl->saves_all_registers)))
10962 x = hard_reg;
10963 else
10964 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10966 else
10967 x = NULL_RTX;
10969 return x;
10972 /* This function returns "2" to indicate dual issue for the SH4
10973 processor. To be used by the DFA pipeline description. */
10974 static int
10975 sh_issue_rate (void)
10977 if (TARGET_SUPERSCALAR)
10978 return 2;
10979 else
10980 return 1;
10983 /* Functions for ready queue reordering for sched1. */
10985 /* Get weight for mode for a set x. */
10986 static short
10987 find_set_regmode_weight (rtx x, enum machine_mode mode)
10989 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10990 return 1;
10991 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10993 if (REG_P (SET_DEST (x)))
10995 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10996 return 1;
10997 else
10998 return 0;
11000 return 1;
11002 return 0;
11005 /* Get regmode weight for insn. */
11006 static short
11007 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
11009 short reg_weight = 0;
11010 rtx x;
11012 /* Increment weight for each register born here. */
11013 x = PATTERN (insn);
11014 reg_weight += find_set_regmode_weight (x, mode);
11015 if (GET_CODE (x) == PARALLEL)
11017 int j;
11018 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11020 x = XVECEXP (PATTERN (insn), 0, j);
11021 reg_weight += find_set_regmode_weight (x, mode);
11024 /* Decrement weight for each register that dies here. */
11025 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11027 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11029 rtx note = XEXP (x, 0);
11030 if (REG_P (note) && GET_MODE (note) == mode)
11031 reg_weight--;
11034 return reg_weight;
11037 /* Calculate regmode weights for all insns of a basic block. */
11038 static void
11039 find_regmode_weight (basic_block b, enum machine_mode mode)
11041 rtx insn, next_tail, head, tail;
11043 get_ebb_head_tail (b, b, &head, &tail);
11044 next_tail = NEXT_INSN (tail);
11046 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11048 /* Handle register life information. */
11049 if (!INSN_P (insn))
11050 continue;
11052 if (mode == SFmode)
11053 INSN_REGMODE_WEIGHT (insn, mode) =
11054 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
11055 else if (mode == SImode)
11056 INSN_REGMODE_WEIGHT (insn, mode) =
11057 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
11061 /* Comparison function for ready queue sorting. */
11062 static int
11063 rank_for_reorder (const void *x, const void *y)
11065 rtx tmp = *(const rtx *) y;
11066 rtx tmp2 = *(const rtx *) x;
11068 /* The insn in a schedule group should be issued the first. */
11069 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11070 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11072 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11073 minimizes instruction movement, thus minimizing sched's effect on
11074 register pressure. */
11075 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11078 /* Resort the array A in which only element at index N may be out of order. */
11079 static void
11080 swap_reorder (rtx *a, int n)
11082 rtx insn = a[n - 1];
11083 int i = n - 2;
11085 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11087 a[i + 1] = a[i];
11088 i -= 1;
11090 a[i + 1] = insn;
11093 /* Sort the ready list by ascending priority. */
11094 static void
11095 ready_reorder (rtx *ready, int nready)
11097 if (nready == 2)
11098 swap_reorder (ready, nready);
11099 else if (nready > 2)
11100 qsort (ready, nready, sizeof (rtx), rank_for_reorder);
11103 /* Count life regions of r0 for a block. */
11104 static int
11105 find_r0_life_regions (basic_block b)
11107 rtx end, insn;
11108 rtx pset;
11109 rtx r0_reg;
11110 int live;
11111 int set;
11112 int death = 0;
11114 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11116 set = 1;
11117 live = 1;
11119 else
11121 set = 0;
11122 live = 0;
11125 insn = BB_HEAD (b);
11126 end = BB_END (b);
11127 r0_reg = gen_rtx_REG (SImode, R0_REG);
11128 while (1)
11130 if (INSN_P (insn))
11132 if (find_regno_note (insn, REG_DEAD, R0_REG))
11134 death++;
11135 live = 0;
11137 if (!live
11138 && (pset = single_set (insn))
11139 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11140 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11142 set++;
11143 live = 1;
11146 if (insn == end)
11147 break;
11148 insn = NEXT_INSN (insn);
11150 return set - death;
11153 /* Calculate regmode weights for all insns of all basic block. */
11154 static void
11155 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11156 int verbose ATTRIBUTE_UNUSED,
11157 int old_max_uid)
11159 basic_block b;
11161 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11162 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11163 r0_life_regions = 0;
11165 FOR_EACH_BB_REVERSE (b)
11167 find_regmode_weight (b, SImode);
11168 find_regmode_weight (b, SFmode);
11169 if (!reload_completed)
11170 r0_life_regions += find_r0_life_regions (b);
11173 CURR_REGMODE_PRESSURE (SImode) = 0;
11174 CURR_REGMODE_PRESSURE (SFmode) = 0;
11178 /* Cleanup. */
11179 static void
11180 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11181 int verbose ATTRIBUTE_UNUSED)
11183 if (regmode_weight[0])
11185 free (regmode_weight[0]);
11186 regmode_weight[0] = NULL;
11188 if (regmode_weight[1])
11190 free (regmode_weight[1]);
11191 regmode_weight[1] = NULL;
11195 /* The scalar modes supported differs from the default version in TImode
11196 for 32-bit SHMEDIA. */
11197 static bool
11198 sh_scalar_mode_supported_p (enum machine_mode mode)
11200 if (TARGET_SHMEDIA32 && mode == TImode)
11201 return false;
11203 return default_scalar_mode_supported_p (mode);
11206 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11207 keep count of register pressures on SImode and SFmode. */
11208 static int
11209 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11210 int sched_verbose ATTRIBUTE_UNUSED,
11211 rtx insn,
11212 int can_issue_more)
11214 if (GET_CODE (PATTERN (insn)) != USE
11215 && GET_CODE (PATTERN (insn)) != CLOBBER)
11216 cached_can_issue_more = can_issue_more - 1;
11217 else
11218 cached_can_issue_more = can_issue_more;
11220 if (reload_completed)
11221 return cached_can_issue_more;
11223 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11224 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11226 return cached_can_issue_more;
11229 static void
11230 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11231 int verbose ATTRIBUTE_UNUSED,
11232 int veclen ATTRIBUTE_UNUSED)
11234 CURR_REGMODE_PRESSURE (SImode) = 0;
11235 CURR_REGMODE_PRESSURE (SFmode) = 0;
11238 /* Some magic numbers. */
11239 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11240 functions that already have high pressure on r0. */
11241 #define R0_MAX_LIFE_REGIONS 2
11242 /* Register Pressure thresholds for SImode and SFmode registers. */
11243 #define SIMODE_MAX_WEIGHT 5
11244 #define SFMODE_MAX_WEIGHT 10
11246 /* Return true if the pressure is high for MODE. */
11247 static bool
11248 high_pressure (enum machine_mode mode)
11250 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11251 functions that already have high pressure on r0. */
11252 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11253 return true;
11255 if (mode == SFmode)
11256 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11257 else
11258 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11261 /* Reorder ready queue if register pressure is high. */
11262 static int
11263 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11264 int sched_verbose ATTRIBUTE_UNUSED,
11265 rtx *ready,
11266 int *n_readyp,
11267 int clock_var ATTRIBUTE_UNUSED)
11269 if (reload_completed)
11270 return sh_issue_rate ();
11272 if (high_pressure (SFmode) || high_pressure (SImode))
11274 ready_reorder (ready, *n_readyp);
11277 return sh_issue_rate ();
11280 /* Skip cycles if the current register pressure is high. */
11281 static int
11282 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11283 int sched_verbose ATTRIBUTE_UNUSED,
11284 rtx *ready ATTRIBUTE_UNUSED,
11285 int *n_readyp ATTRIBUTE_UNUSED,
11286 int clock_var ATTRIBUTE_UNUSED)
11288 if (reload_completed)
11289 return cached_can_issue_more;
11291 if (high_pressure(SFmode) || high_pressure (SImode))
11292 skip_cycles = 1;
11294 return cached_can_issue_more;
11297 /* Skip cycles without sorting the ready queue. This will move insn from
11298 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11299 queue by sh_reorder. */
11301 /* Generally, skipping these many cycles are sufficient for all insns to move
11302 from Q -> R. */
11303 #define MAX_SKIPS 8
11305 static int
11306 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11307 int sched_verbose ATTRIBUTE_UNUSED,
11308 rtx insn ATTRIBUTE_UNUSED,
11309 int last_clock_var,
11310 int clock_var,
11311 int *sort_p)
11313 if (reload_completed)
11314 return 0;
11316 if (skip_cycles)
11318 if ((clock_var - last_clock_var) < MAX_SKIPS)
11320 *sort_p = 0;
11321 return 1;
11323 /* If this is the last cycle we are skipping, allow reordering of R. */
11324 if ((clock_var - last_clock_var) == MAX_SKIPS)
11326 *sort_p = 1;
11327 return 1;
11331 skip_cycles = 0;
11333 return 0;
11336 /* SHmedia requires registers for branches, so we can't generate new
11337 branches past reload. */
11338 static bool
11339 sh_cannot_modify_jumps_p (void)
11341 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11344 static reg_class_t
11345 sh_target_reg_class (void)
11347 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11350 static bool
11351 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11353 HARD_REG_SET dummy;
11354 #if 0
11355 rtx insn;
11356 #endif
11358 if (! shmedia_space_reserved_for_target_registers)
11359 return 0;
11360 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11361 return 0;
11362 if (calc_live_regs (&dummy) >= 6 * 8)
11363 return 1;
11364 return 0;
11367 static bool
11368 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11370 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11374 On the SH1..SH4, the trampoline looks like
11375 2 0002 D202 mov.l l2,r2
11376 1 0000 D301 mov.l l1,r3
11377 3 0004 422B jmp @r2
11378 4 0006 0009 nop
11379 5 0008 00000000 l1: .long area
11380 6 000c 00000000 l2: .long function
11382 SH5 (compact) uses r1 instead of r3 for the static chain. */
11385 /* Emit RTL insns to initialize the variable parts of a trampoline.
11386 FNADDR is an RTX for the address of the function's pure code.
11387 CXT is an RTX for the static chain value for the function. */
11389 static void
11390 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11392 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11393 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11395 if (TARGET_SHMEDIA64)
11397 rtx tramp_templ;
11398 int fixed_len;
11400 rtx movi1 = GEN_INT (0xcc000010);
11401 rtx shori1 = GEN_INT (0xc8000010);
11402 rtx src, dst;
11404 /* The following trampoline works within a +- 128 KB range for cxt:
11405 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11406 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11407 gettr tr1,r1; blink tr0,r63 */
11408 /* Address rounding makes it hard to compute the exact bounds of the
11409 offset for this trampoline, but we have a rather generous offset
11410 range, so frame_offset should do fine as an upper bound. */
11411 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11413 /* ??? could optimize this trampoline initialization
11414 by writing DImode words with two insns each. */
11415 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11416 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11417 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11418 insn = gen_rtx_AND (DImode, insn, mask);
11419 /* Or in ptb/u .,tr1 pattern */
11420 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11421 insn = force_operand (insn, NULL_RTX);
11422 insn = gen_lowpart (SImode, insn);
11423 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11424 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11425 insn = gen_rtx_AND (DImode, insn, mask);
11426 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11427 insn = gen_lowpart (SImode, insn);
11428 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11429 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11430 insn = gen_rtx_AND (DImode, insn, mask);
11431 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11432 insn = gen_lowpart (SImode, insn);
11433 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11434 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11435 insn = gen_rtx_AND (DImode, insn, mask);
11436 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11437 insn = gen_lowpart (SImode, insn);
11438 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11439 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11440 insn = gen_rtx_AND (DImode, insn, mask);
11441 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11442 insn = gen_lowpart (SImode, insn);
11443 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11444 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11445 GEN_INT (0x6bf10600));
11446 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11447 GEN_INT (0x4415fc10));
11448 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11449 GEN_INT (0x4401fff0));
11450 emit_insn (gen_ic_invalidate_line (tramp));
11451 return;
11453 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11454 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11456 tramp_templ = gen_datalabel_ref (tramp_templ);
11457 dst = tramp_mem;
11458 src = gen_const_mem (BLKmode, tramp_templ);
11459 set_mem_align (dst, 256);
11460 set_mem_align (src, 64);
11461 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11463 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11464 emit_move_insn (adjust_address (tramp_mem, Pmode,
11465 fixed_len + GET_MODE_SIZE (Pmode)),
11466 cxt);
11467 emit_insn (gen_ic_invalidate_line (tramp));
11468 return;
11470 else if (TARGET_SHMEDIA)
11472 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11473 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11474 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11475 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11476 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11477 rotated 10 right, and higher 16 bit of every 32 selected. */
11478 rtx movishori
11479 = force_reg (V2HImode, (simplify_gen_subreg
11480 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11481 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11482 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11484 fnaddr = force_reg (SImode, fnaddr);
11485 cxt = force_reg (SImode, cxt);
11486 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11487 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11488 movishori));
11489 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11490 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11491 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11492 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11493 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11494 gen_rtx_SUBREG (V2HImode, cxt, 0),
11495 movishori));
11496 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11497 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11498 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11499 if (TARGET_LITTLE_ENDIAN)
11501 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11502 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11504 else
11506 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11507 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11509 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11510 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11511 emit_insn (gen_ic_invalidate_line (tramp));
11512 return;
11514 else if (TARGET_SHCOMPACT)
11516 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11517 return;
11519 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11520 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11521 SImode));
11522 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11523 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11524 SImode));
11525 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11526 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11527 if (TARGET_HARVARD)
11529 if (!TARGET_INLINE_IC_INVALIDATE
11530 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
11531 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11532 FUNCTION_ORDINARY),
11533 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11534 else
11535 emit_insn (gen_ic_invalidate_line (tramp));
11539 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11541 static rtx
11542 sh_trampoline_adjust_address (rtx tramp)
11544 if (TARGET_SHMEDIA)
11545 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11546 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11547 return tramp;
11550 /* FIXME: This is overly conservative. A SHcompact function that
11551 receives arguments ``by reference'' will have them stored in its
11552 own stack frame, so it must not pass pointers or references to
11553 these arguments to other functions by means of sibling calls. */
11554 /* If PIC, we cannot make sibling calls to global functions
11555 because the PLT requires r12 to be live. */
11556 static bool
11557 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11559 return (1
11560 && (! TARGET_SHCOMPACT
11561 || crtl->args.info.stack_regs == 0)
11562 && ! sh_cfun_interrupt_handler_p ()
11563 && (! flag_pic
11564 || (decl && ! TREE_PUBLIC (decl))
11565 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11568 /* Machine specific built-in functions. */
11570 struct builtin_description
11572 bool (* const is_enabled) (void);
11573 const enum insn_code icode;
11574 const char *const name;
11575 int signature;
11576 tree fndecl;
11579 static bool
11580 shmedia_builtin_p (void)
11582 return TARGET_SHMEDIA;
11585 static bool
11586 sh1_builtin_p (void)
11588 return TARGET_SH1;
11591 /* describe number and signedness of arguments; arg[0] == result
11592 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11593 /* 9: 64-bit pointer, 10: 32-bit pointer */
11594 static const char signature_args[][4] =
11596 #define SH_BLTIN_V2SI2 0
11597 { 4, 4 },
11598 #define SH_BLTIN_V4HI2 1
11599 { 4, 4 },
11600 #define SH_BLTIN_V2SI3 2
11601 { 4, 4, 4 },
11602 #define SH_BLTIN_V4HI3 3
11603 { 4, 4, 4 },
11604 #define SH_BLTIN_V8QI3 4
11605 { 4, 4, 4 },
11606 #define SH_BLTIN_MAC_HISI 5
11607 { 1, 4, 4, 1 },
11608 #define SH_BLTIN_SH_HI 6
11609 { 4, 4, 1 },
11610 #define SH_BLTIN_SH_SI 7
11611 { 4, 4, 1 },
11612 #define SH_BLTIN_V4HI2V2SI 8
11613 { 4, 4, 4 },
11614 #define SH_BLTIN_V4HI2V8QI 9
11615 { 4, 4, 4 },
11616 #define SH_BLTIN_SISF 10
11617 { 4, 2 },
11618 #define SH_BLTIN_LDUA_L 11
11619 { 2, 10 },
11620 #define SH_BLTIN_LDUA_Q 12
11621 { 1, 10 },
11622 #define SH_BLTIN_STUA_L 13
11623 { 0, 10, 2 },
11624 #define SH_BLTIN_STUA_Q 14
11625 { 0, 10, 1 },
11626 #define SH_BLTIN_LDUA_L64 15
11627 { 2, 9 },
11628 #define SH_BLTIN_LDUA_Q64 16
11629 { 1, 9 },
11630 #define SH_BLTIN_STUA_L64 17
11631 { 0, 9, 2 },
11632 #define SH_BLTIN_STUA_Q64 18
11633 { 0, 9, 1 },
11634 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11635 #define SH_BLTIN_2 19
11636 #define SH_BLTIN_SU 19
11637 { 1, 2 },
11638 #define SH_BLTIN_3 20
11639 #define SH_BLTIN_SUS 20
11640 { 2, 2, 1 },
11641 #define SH_BLTIN_PSSV 21
11642 { 0, 8, 2, 2 },
11643 #define SH_BLTIN_XXUU 22
11644 #define SH_BLTIN_UUUU 22
11645 { 1, 1, 1, 1 },
11646 #define SH_BLTIN_PV 23
11647 { 0, 8 },
11648 #define SH_BLTIN_VP 24
11649 { 8, 0 },
11651 /* mcmv: operands considered unsigned. */
11652 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11653 /* mperm: control value considered unsigned int. */
11654 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11655 /* mshards_q: returns signed short. */
11656 /* nsb: takes long long arg, returns unsigned char. */
11657 static struct builtin_description bdesc[] =
11659 { shmedia_builtin_p,
11660 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11661 { shmedia_builtin_p,
11662 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11663 { shmedia_builtin_p,
11664 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11665 { shmedia_builtin_p,
11666 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11667 { shmedia_builtin_p,
11668 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11669 { shmedia_builtin_p,
11670 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11671 { shmedia_builtin_p,
11672 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11673 { shmedia_builtin_p,
11674 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11675 { shmedia_builtin_p,
11676 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11677 { shmedia_builtin_p,
11678 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11679 { shmedia_builtin_p,
11680 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11681 { shmedia_builtin_p,
11682 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11683 { shmedia_builtin_p,
11684 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11685 { shmedia_builtin_p,
11686 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11687 { shmedia_builtin_p,
11688 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11689 { shmedia_builtin_p,
11690 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11691 { shmedia_builtin_p,
11692 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11693 { shmedia_builtin_p,
11694 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11695 { shmedia_builtin_p,
11696 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11697 { shmedia_builtin_p,
11698 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11699 { shmedia_builtin_p,
11700 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11701 { shmedia_builtin_p,
11702 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11703 { shmedia_builtin_p,
11704 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11705 { shmedia_builtin_p,
11706 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11707 { shmedia_builtin_p,
11708 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11709 { shmedia_builtin_p,
11710 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11711 { shmedia_builtin_p,
11712 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11713 { shmedia_builtin_p,
11714 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11715 { shmedia_builtin_p,
11716 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11717 { shmedia_builtin_p,
11718 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11719 { shmedia_builtin_p,
11720 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11721 { shmedia_builtin_p,
11722 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11723 { shmedia_builtin_p,
11724 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11725 { shmedia_builtin_p,
11726 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11727 { shmedia_builtin_p,
11728 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11729 { shmedia_builtin_p,
11730 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11731 { shmedia_builtin_p,
11732 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11733 { shmedia_builtin_p,
11734 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11735 { shmedia_builtin_p,
11736 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11737 { shmedia_builtin_p,
11738 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11739 { shmedia_builtin_p,
11740 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11741 { shmedia_builtin_p,
11742 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11743 { shmedia_builtin_p,
11744 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11745 { shmedia_builtin_p,
11746 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11747 { shmedia_builtin_p,
11748 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11749 { shmedia_builtin_p,
11750 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11751 { shmedia_builtin_p,
11752 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11753 { shmedia_builtin_p,
11754 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11755 { shmedia_builtin_p,
11756 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11757 { shmedia_builtin_p,
11758 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11759 { shmedia_builtin_p,
11760 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11761 { shmedia_builtin_p,
11762 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11763 { shmedia_builtin_p,
11764 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11765 { shmedia_builtin_p,
11766 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11767 { shmedia_builtin_p,
11768 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11769 { shmedia_builtin_p,
11770 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11771 { shmedia_builtin_p,
11772 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11773 { shmedia_builtin_p,
11774 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11775 { shmedia_builtin_p,
11776 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11777 { shmedia_builtin_p,
11778 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11779 { shmedia_builtin_p,
11780 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11781 { shmedia_builtin_p,
11782 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11783 { shmedia_builtin_p,
11784 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11785 { shmedia_builtin_p,
11786 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11787 { shmedia_builtin_p,
11788 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11789 { shmedia_builtin_p,
11790 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11791 { shmedia_builtin_p,
11792 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11793 { shmedia_builtin_p,
11794 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11795 { shmedia_builtin_p,
11796 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11797 { shmedia_builtin_p,
11798 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11799 { shmedia_builtin_p,
11800 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11801 { shmedia_builtin_p,
11802 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11803 { shmedia_builtin_p,
11804 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11805 { shmedia_builtin_p,
11806 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11807 { shmedia_builtin_p,
11808 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11809 { shmedia_builtin_p,
11810 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11811 { shmedia_builtin_p,
11812 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11813 { shmedia_builtin_p,
11814 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11815 { shmedia_builtin_p,
11816 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11817 { shmedia_builtin_p,
11818 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11819 { shmedia_builtin_p,
11820 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11821 { shmedia_builtin_p,
11822 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11823 { shmedia_builtin_p,
11824 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11827 static void
11828 sh_init_builtins (void)
11830 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11831 memset (shared, 0, sizeof shared);
11833 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
11835 builtin_description* d = &bdesc[di];
11837 if (!d->is_enabled ())
11838 continue;
11840 tree type, arg_type = NULL_TREE;
11841 int signature = d->signature;
11843 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11844 type = shared[signature];
11845 else
11847 int has_result = signature_args[signature][0] != 0;
11848 tree args[3];
11850 if ((signature_args[signature][1] & 8)
11851 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11852 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11853 continue;
11854 if (! TARGET_FPU_ANY
11855 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11856 continue;
11857 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
11858 args[i] = NULL_TREE;
11859 for (int i = 3; ; i--)
11861 int arg = signature_args[signature][i];
11862 int opno = i - 1 + has_result;
11864 if (arg & 8)
11865 arg_type = ptr_type_node;
11866 else if (arg)
11867 arg_type = (*lang_hooks.types.type_for_mode)
11868 (insn_data[d->icode].operand[opno].mode, (arg & 1));
11869 else if (i)
11870 continue;
11871 else
11872 arg_type = void_type_node;
11873 if (i == 0)
11874 break;
11875 args[i-1] = arg_type;
11877 type = build_function_type_list (arg_type, args[0], args[1],
11878 args[2], NULL_TREE);
11879 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11880 shared[signature] = type;
11882 d->fndecl =
11883 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11884 NULL, NULL_TREE);
11888 /* Implements target hook vector_mode_supported_p. */
11889 bool
11890 sh_vector_mode_supported_p (enum machine_mode mode)
11892 if (TARGET_FPU_ANY
11893 && ((mode == V2SFmode)
11894 || (mode == V4SFmode)
11895 || (mode == V16SFmode)))
11896 return true;
11898 else if (TARGET_SHMEDIA
11899 && ((mode == V8QImode)
11900 || (mode == V2HImode)
11901 || (mode == V4HImode)
11902 || (mode == V2SImode)))
11903 return true;
11905 return false;
11908 bool
11909 sh_frame_pointer_required (void)
11911 /* If needed override this in other tm.h files to cope with various OS
11912 lossage requiring a frame pointer. */
11913 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11914 return true;
11916 if (crtl->profile)
11917 return true;
11919 return false;
11922 /* Implements target hook dwarf_calling_convention. Return an enum
11923 of dwarf_calling_convention. */
11925 sh_dwarf_calling_convention (const_tree func)
11927 if (sh_attr_renesas_p (func))
11928 return DW_CC_GNU_renesas_sh;
11930 return DW_CC_normal;
11933 /* Returns the sh builtin decl for CODE. */
11935 static tree
11936 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11938 if (code >= ARRAY_SIZE (bdesc))
11939 return error_mark_node;
11941 if (!bdesc[code].is_enabled ())
11942 return error_mark_node;
11944 return bdesc[code].fndecl;
11947 /* Expand an expression EXP that calls a built-in function,
11948 with result going to TARGET if that's convenient
11949 (and in mode MODE if that's convenient).
11950 SUBTARGET may be used as the target for computing one of EXP's operands.
11951 IGNORE is nonzero if the value is to be ignored. */
11953 static rtx
11954 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11955 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11957 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11958 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11959 const struct builtin_description *d = &bdesc[fcode];
11960 enum insn_code icode = d->icode;
11961 int signature = d->signature;
11962 int nop = 0;
11963 rtx op[4];
11965 if (signature_args[signature][0])
11967 if (ignore)
11968 return NULL_RTX;
11970 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11971 if (! target || GET_MODE (target) != tmode
11972 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11973 target = gen_reg_rtx (tmode);
11974 op[nop++] = target;
11976 else
11977 target = NULL_RTX;
11979 for (int i = 1; i <= 3; i++, nop++)
11981 tree arg;
11982 enum machine_mode opmode, argmode;
11983 tree optype;
11985 if (! signature_args[signature][i])
11986 break;
11987 arg = CALL_EXPR_ARG (exp, i - 1);
11988 if (arg == error_mark_node)
11989 return const0_rtx;
11990 if (signature_args[signature][i] & 8)
11992 opmode = ptr_mode;
11993 optype = ptr_type_node;
11995 else
11997 opmode = insn_data[icode].operand[nop].mode;
11998 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12000 argmode = TYPE_MODE (TREE_TYPE (arg));
12001 if (argmode != opmode)
12002 arg = build1 (NOP_EXPR, optype, arg);
12003 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12004 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12005 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12008 rtx pat = NULL_RTX;
12010 switch (nop)
12012 case 1:
12013 pat = (*insn_data[d->icode].genfun) (op[0]);
12014 break;
12015 case 2:
12016 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12017 break;
12018 case 3:
12019 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12020 break;
12021 case 4:
12022 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12023 break;
12024 default:
12025 gcc_unreachable ();
12027 if (! pat)
12028 return NULL_RTX;
12029 emit_insn (pat);
12030 return target;
12033 void
12034 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12036 rtx sel0 = const0_rtx;
12037 rtx sel1 = const1_rtx;
12038 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12039 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12041 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12042 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12045 void
12046 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12048 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12050 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12051 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12054 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12055 We can allow any mode in any general register. The special registers
12056 only allow SImode. Don't allow any mode in the PR.
12058 We cannot hold DCmode values in the XD registers because alter_reg
12059 handles subregs of them incorrectly. We could work around this by
12060 spacing the XD registers like the DR registers, but this would require
12061 additional memory in every compilation to hold larger register vectors.
12062 We could hold SFmode / SCmode values in XD registers, but that
12063 would require a tertiary reload when reloading from / to memory,
12064 and a secondary reload to reload from / to general regs; that
12065 seems to be a losing proposition.
12067 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12068 it won't be ferried through GP registers first. */
12070 bool
12071 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
12073 if (SPECIAL_REGISTER_P (regno))
12074 return mode == SImode;
12076 if (regno == FPUL_REG)
12077 return (mode == SImode || mode == SFmode);
12079 if (FP_REGISTER_P (regno) && mode == SFmode)
12080 return true;
12082 if (mode == V2SFmode)
12084 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12085 || GENERAL_REGISTER_P (regno)))
12086 return true;
12087 else
12088 return false;
12091 if (mode == V4SFmode)
12093 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12094 || GENERAL_REGISTER_P (regno))
12095 return true;
12096 else
12097 return false;
12100 if (mode == V16SFmode)
12102 if (TARGET_SHMEDIA)
12104 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12105 return true;
12106 else
12107 return false;
12109 else
12110 return regno == FIRST_XD_REG;
12113 if (FP_REGISTER_P (regno))
12115 if (mode == SFmode
12116 || mode == SImode
12117 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12118 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12119 || mode == DCmode
12120 || (TARGET_SHMEDIA
12121 && (mode == DFmode || mode == DImode
12122 || mode == V2SFmode || mode == TImode)))
12123 && ((regno - FIRST_FP_REG) & 1) == 0)
12124 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12125 && ((regno - FIRST_FP_REG) & 3) == 0))
12126 return true;
12127 else
12128 return false;
12131 if (XD_REGISTER_P (regno))
12132 return mode == DFmode;
12134 if (TARGET_REGISTER_P (regno))
12135 return (mode == DImode || mode == SImode || mode == PDImode);
12137 if (regno == PR_REG)
12138 return mode == SImode;
12140 if (regno == FPSCR_REG)
12141 return mode == PSImode;
12143 /* FIXME. This works around PR target/37633 for -O0. */
12144 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12146 unsigned int n = GET_MODE_SIZE (mode) / 8;
12148 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12149 && regno <= FIRST_GENERAL_REG + 14)
12150 return false;
12153 return true;
12156 /* Return the class of registers for which a mode change from FROM to TO
12157 is invalid. */
12158 bool
12159 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
12160 enum reg_class rclass)
12162 /* We want to enable the use of SUBREGs as a means to
12163 VEC_SELECT a single element of a vector. */
12165 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12166 This can be problematic when SFmode vector subregs need to be accessed
12167 on the stack with displacement addressing, as it happens with -O0.
12168 Thus we disallow the mode change for -O0. */
12169 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12170 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12172 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12174 if (TARGET_LITTLE_ENDIAN)
12176 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12177 return reg_classes_intersect_p (DF_REGS, rclass);
12179 else
12181 if (GET_MODE_SIZE (from) < 8)
12182 return reg_classes_intersect_p (DF_HI_REGS, rclass);
12185 return false;
12188 /* Return true if registers in machine mode MODE will likely be
12189 allocated to registers in small register classes. */
12191 bool
12192 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
12194 return (! TARGET_SHMEDIA);
12197 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12198 that label is used. */
12200 void
12201 sh_mark_label (rtx address, int nuses)
12203 if (GOTOFF_P (address))
12205 /* Extract the label or symbol. */
12206 address = XEXP (address, 0);
12207 if (GET_CODE (address) == PLUS)
12208 address = XEXP (address, 0);
12209 address = XVECEXP (address, 0, 0);
12211 if (GET_CODE (address) == LABEL_REF
12212 && LABEL_P (XEXP (address, 0)))
12213 LABEL_NUSES (XEXP (address, 0)) += nuses;
12216 /* Compute extra cost of moving data between one register class
12217 and another. */
12219 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12220 uses this information. Hence, the general register <-> floating point
12221 register information here is not used for SFmode. */
12223 static int
12224 sh_register_move_cost (enum machine_mode mode,
12225 reg_class_t srcclass, reg_class_t dstclass)
12227 if (dstclass == T_REGS || dstclass == PR_REGS)
12228 return 10;
12230 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12231 return 4;
12233 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12234 && REGCLASS_HAS_FP_REG (srcclass)
12235 && REGCLASS_HAS_FP_REG (dstclass))
12236 return 4;
12238 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12239 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12241 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12242 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12243 return 9;
12245 if ((REGCLASS_HAS_FP_REG (dstclass)
12246 && REGCLASS_HAS_GENERAL_REG (srcclass))
12247 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12248 && REGCLASS_HAS_FP_REG (srcclass)))
12250 /* Discourage trying to use fp regs for a pointer. This also
12251 discourages fp regs with SImode because Pmode is an alias
12252 of SImode on this target. See PR target/48596. */
12253 int addend = (mode == Pmode) ? 40 : 0;
12255 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12256 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12259 if ((dstclass == FPUL_REGS
12260 && REGCLASS_HAS_GENERAL_REG (srcclass))
12261 || (srcclass == FPUL_REGS
12262 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12263 return 5;
12265 if ((dstclass == FPUL_REGS
12266 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12267 || (srcclass == FPUL_REGS
12268 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12269 return 7;
12271 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12272 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12273 return 20;
12275 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12276 if (TARGET_SHMEDIA
12277 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12279 if (sh_gettrcost >= 0)
12280 return sh_gettrcost;
12281 else if (!TARGET_PT_FIXED)
12282 return 100;
12285 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12286 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12287 return 4;
12289 if (TARGET_SHMEDIA
12290 || (TARGET_FMOVD
12291 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12292 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12293 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12295 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12298 static rtx emit_load_ptr (rtx, rtx);
12300 static rtx
12301 emit_load_ptr (rtx reg, rtx addr)
12303 rtx mem = gen_const_mem (ptr_mode, addr);
12305 if (Pmode != ptr_mode)
12306 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12307 return emit_move_insn (reg, mem);
12310 static void
12311 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12312 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12313 tree function)
12315 CUMULATIVE_ARGS cum;
12316 int structure_value_byref = 0;
12317 rtx this_rtx, this_value, sibcall, insns, funexp;
12318 tree funtype = TREE_TYPE (function);
12319 int simple_add = CONST_OK_FOR_ADD (delta);
12320 int did_load = 0;
12321 rtx scratch0, scratch1, scratch2;
12322 unsigned i;
12324 reload_completed = 1;
12325 epilogue_completed = 1;
12326 crtl->uses_only_leaf_regs = 1;
12328 emit_note (NOTE_INSN_PROLOGUE_END);
12330 /* Find the "this" pointer. We have such a wide range of ABIs for the
12331 SH that it's best to do this completely machine independently.
12332 "this" is passed as first argument, unless a structure return pointer
12333 comes first, in which case "this" comes second. */
12334 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12335 #ifndef PCC_STATIC_STRUCT_RETURN
12336 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12337 structure_value_byref = 1;
12338 #endif /* not PCC_STATIC_STRUCT_RETURN */
12339 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12341 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12343 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12345 this_rtx
12346 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12348 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12349 static chain pointer (even if you can't have nested virtual functions
12350 right now, someone might implement them sometime), and the rest of the
12351 registers are used for argument passing, are callee-saved, or reserved. */
12352 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12353 -ffixed-reg has been used. */
12354 if (! call_used_regs[0] || fixed_regs[0])
12355 error ("r0 needs to be available as a call-clobbered register");
12356 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12357 if (! TARGET_SH5)
12359 if (call_used_regs[1] && ! fixed_regs[1])
12360 scratch1 = gen_rtx_REG (ptr_mode, 1);
12361 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12362 pointing where to return struct values. */
12363 if (call_used_regs[3] && ! fixed_regs[3])
12364 scratch2 = gen_rtx_REG (Pmode, 3);
12366 else if (TARGET_SHMEDIA)
12368 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12369 if (i != REGNO (scratch0) &&
12370 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12372 scratch1 = gen_rtx_REG (ptr_mode, i);
12373 break;
12375 if (scratch1 == scratch0)
12376 error ("need a second call-clobbered general purpose register");
12377 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12378 if (call_used_regs[i] && ! fixed_regs[i])
12380 scratch2 = gen_rtx_REG (Pmode, i);
12381 break;
12383 if (scratch2 == scratch0)
12384 error ("need a call-clobbered target register");
12387 this_value = plus_constant (Pmode, this_rtx, delta);
12388 if (vcall_offset
12389 && (simple_add || scratch0 != scratch1)
12390 && strict_memory_address_p (ptr_mode, this_value))
12392 emit_load_ptr (scratch0, this_value);
12393 did_load = 1;
12396 if (!delta)
12397 ; /* Do nothing. */
12398 else if (simple_add)
12399 emit_move_insn (this_rtx, this_value);
12400 else
12402 emit_move_insn (scratch1, GEN_INT (delta));
12403 emit_insn (gen_add2_insn (this_rtx, scratch1));
12406 if (vcall_offset)
12408 rtx offset_addr;
12410 if (!did_load)
12411 emit_load_ptr (scratch0, this_rtx);
12413 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12414 if (strict_memory_address_p (ptr_mode, offset_addr))
12415 ; /* Do nothing. */
12416 else if (! TARGET_SH5 && scratch0 != scratch1)
12418 /* scratch0 != scratch1, and we have indexed loads. Get better
12419 schedule by loading the offset into r1 and using an indexed
12420 load - then the load of r1 can issue before the load from
12421 (this_rtx + delta) finishes. */
12422 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12423 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12425 else if (CONST_OK_FOR_ADD (vcall_offset))
12427 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12428 offset_addr = scratch0;
12430 else if (scratch0 != scratch1)
12432 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12433 emit_insn (gen_add2_insn (scratch0, scratch1));
12434 offset_addr = scratch0;
12436 else
12437 gcc_unreachable (); /* FIXME */
12438 emit_load_ptr (scratch0, offset_addr);
12440 if (Pmode != ptr_mode)
12441 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12442 emit_insn (gen_add2_insn (this_rtx, scratch0));
12445 /* Generate a tail call to the target function. */
12446 if (! TREE_USED (function))
12448 assemble_external (function);
12449 TREE_USED (function) = 1;
12451 funexp = XEXP (DECL_RTL (function), 0);
12452 /* If the function is overridden, so is the thunk, hence we don't
12453 need GOT addressing even if this is a public symbol. */
12454 #if 0
12455 if (TARGET_SH1 && ! flag_weak)
12456 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12457 else
12458 #endif
12459 if (TARGET_SH2 && flag_pic)
12461 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12462 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12464 else
12466 if (TARGET_SHMEDIA && flag_pic)
12468 funexp = gen_sym2PIC (funexp);
12469 PUT_MODE (funexp, Pmode);
12471 emit_move_insn (scratch2, funexp);
12472 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12473 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12475 sibcall = emit_call_insn (sibcall);
12476 SIBLING_CALL_P (sibcall) = 1;
12477 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12478 emit_barrier ();
12480 /* Run just enough of rest_of_compilation to do scheduling and get
12481 the insns emitted. Note that use_thunk calls
12482 assemble_start_function and assemble_end_function. */
12484 insns = get_insns ();
12486 if (optimize > 0)
12488 if (! cfun->cfg)
12489 init_flow (cfun);
12490 split_all_insns_noflow ();
12493 sh_reorg ();
12494 shorten_branches (insns);
12495 final_start_function (insns, file, 1);
12496 final (insns, file, 1);
12497 final_end_function ();
12499 reload_completed = 0;
12500 epilogue_completed = 0;
12504 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12506 rtx sym;
12508 /* If this is not an ordinary function, the name usually comes from a
12509 string literal or an sprintf buffer. Make sure we use the same
12510 string consistently, so that cse will be able to unify address loads. */
12511 if (kind != FUNCTION_ORDINARY)
12512 name = IDENTIFIER_POINTER (get_identifier (name));
12513 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12514 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12515 if (flag_pic)
12516 switch (kind)
12518 case FUNCTION_ORDINARY:
12519 break;
12520 case SFUNC_GOT:
12522 rtx reg = target ? target : gen_reg_rtx (Pmode);
12524 emit_insn (gen_symGOT2reg (reg, sym));
12525 sym = reg;
12526 break;
12528 case SFUNC_STATIC:
12530 /* ??? To allow cse to work, we use GOTOFF relocations.
12531 we could add combiner patterns to transform this into
12532 straight pc-relative calls with sym2PIC / bsrf when
12533 label load and function call are still 1:1 and in the
12534 same basic block during combine. */
12535 rtx reg = target ? target : gen_reg_rtx (Pmode);
12537 emit_insn (gen_symGOTOFF2reg (reg, sym));
12538 sym = reg;
12539 break;
12542 if (target && sym != target)
12544 emit_move_insn (target, sym);
12545 return target;
12547 return sym;
12550 /* Find the number of a general purpose register in S. */
12551 static int
12552 scavenge_reg (HARD_REG_SET *s)
12554 int r;
12555 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12556 if (TEST_HARD_REG_BIT (*s, r))
12557 return r;
12558 return -1;
12562 sh_get_pr_initial_val (void)
12564 rtx val;
12566 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12567 PR register on SHcompact, because it might be clobbered by the prologue.
12568 We check first if that is known to be the case. */
12569 if (TARGET_SHCOMPACT
12570 && ((crtl->args.info.call_cookie
12571 & ~ CALL_COOKIE_RET_TRAMP (1))
12572 || crtl->saves_all_registers))
12573 return gen_frame_mem (SImode, return_address_pointer_rtx);
12575 /* If we haven't finished rtl generation, there might be a nonlocal label
12576 that we haven't seen yet.
12577 ??? get_hard_reg_initial_val fails if it is called after register
12578 allocation has started, unless it has been called before for the
12579 same register. And even then, we end in trouble if we didn't use
12580 the register in the same basic block before. So call
12581 get_hard_reg_initial_val now and wrap it in an unspec if we might
12582 need to replace it. */
12583 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12584 combine can put the pseudo returned by get_hard_reg_initial_val into
12585 instructions that need a general purpose registers, which will fail to
12586 be recognized when the pseudo becomes allocated to PR. */
12588 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12589 if (TARGET_SH1)
12590 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12591 return val;
12594 bool
12595 sh_expand_t_scc (rtx operands[])
12597 enum rtx_code code = GET_CODE (operands[1]);
12598 rtx target = operands[0];
12599 rtx op0 = operands[2];
12600 rtx op1 = operands[3];
12601 rtx result = target;
12602 HOST_WIDE_INT val;
12604 if (!REG_P (op0) || REGNO (op0) != T_REG
12605 || !CONST_INT_P (op1))
12606 return false;
12607 if (!REG_P (result))
12608 result = gen_reg_rtx (SImode);
12609 val = INTVAL (op1);
12610 if ((code == EQ && val == 1) || (code == NE && val == 0))
12611 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12612 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12613 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12614 else if (code == EQ || code == NE)
12615 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12616 else
12617 return false;
12618 if (result != target)
12619 emit_move_insn (target, result);
12620 return true;
12623 /* INSN is an sfunc; return the rtx that describes the address used. */
12624 static rtx
12625 extract_sfunc_addr (rtx insn)
12627 rtx pattern, part = NULL_RTX;
12628 int len, i;
12630 pattern = PATTERN (insn);
12631 len = XVECLEN (pattern, 0);
12632 for (i = 0; i < len; i++)
12634 part = XVECEXP (pattern, 0, i);
12635 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12636 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12637 return XEXP (part, 0);
12639 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12640 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12643 /* Verify that the register in use_sfunc_addr still agrees with the address
12644 used in the sfunc. This prevents fill_slots_from_thread from changing
12645 use_sfunc_addr.
12646 INSN is the use_sfunc_addr instruction, and REG is the register it
12647 guards. */
12648 bool
12649 check_use_sfunc_addr (rtx insn, rtx reg)
12651 /* Search for the sfunc. It should really come right after INSN. */
12652 while ((insn = NEXT_INSN (insn)))
12654 if (LABEL_P (insn) || JUMP_P (insn))
12655 break;
12656 if (! INSN_P (insn))
12657 continue;
12659 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
12660 insn = XVECEXP (PATTERN (insn), 0, 0);
12661 if (GET_CODE (PATTERN (insn)) != PARALLEL
12662 || get_attr_type (insn) != TYPE_SFUNC)
12663 continue;
12664 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12666 gcc_unreachable ();
12669 /* This function returns a constant rtx that represents 2**15 / pi in
12670 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12671 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12672 static GTY(()) rtx sh_fsca_sf2int_rtx;
12675 sh_fsca_sf2int (void)
12677 if (! sh_fsca_sf2int_rtx)
12679 REAL_VALUE_TYPE rv;
12681 real_from_string (&rv, "10430.378350470453");
12682 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12685 return sh_fsca_sf2int_rtx;
12688 /* This function returns a constant rtx that represents pi / 2**15 in
12689 SFmode. It's used to scale SFmode angles, in radians, to a
12690 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12691 maps to 0x10000. */
12692 static GTY(()) rtx sh_fsca_int2sf_rtx;
12695 sh_fsca_int2sf (void)
12697 if (! sh_fsca_int2sf_rtx)
12699 REAL_VALUE_TYPE rv;
12701 real_from_string (&rv, "9.587379924285257e-5");
12702 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12705 return sh_fsca_int2sf_rtx;
12708 /* Initialize the CUMULATIVE_ARGS structure. */
12710 void
12711 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12712 tree fntype,
12713 rtx libname ATTRIBUTE_UNUSED,
12714 tree fndecl,
12715 signed int n_named_args,
12716 enum machine_mode mode)
12718 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12719 pcum->free_single_fp_reg = 0;
12720 pcum->stack_regs = 0;
12721 pcum->byref_regs = 0;
12722 pcum->byref = 0;
12723 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12725 /* XXX - Should we check TARGET_HITACHI here ??? */
12726 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12728 if (fntype)
12730 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12731 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12732 pcum->prototype_p = prototype_p (fntype);
12733 pcum->arg_count [(int) SH_ARG_INT]
12734 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12736 pcum->call_cookie
12737 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12738 && pcum->arg_count [(int) SH_ARG_INT] == 0
12739 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12740 ? int_size_in_bytes (TREE_TYPE (fntype))
12741 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12742 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12743 == FIRST_RET_REG));
12745 else
12747 pcum->arg_count [(int) SH_ARG_INT] = 0;
12748 pcum->prototype_p = FALSE;
12749 if (mode != VOIDmode)
12751 pcum->call_cookie =
12752 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12753 && GET_MODE_SIZE (mode) > 4
12754 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12756 /* If the default ABI is the Renesas ABI then all library
12757 calls must assume that the library will be using the
12758 Renesas ABI. So if the function would return its result
12759 in memory then we must force the address of this memory
12760 block onto the stack. Ideally we would like to call
12761 targetm.calls.return_in_memory() here but we do not have
12762 the TYPE or the FNDECL available so we synthesize the
12763 contents of that function as best we can. */
12764 pcum->force_mem =
12765 (TARGET_DEFAULT & MASK_HITACHI)
12766 && (mode == BLKmode
12767 || (GET_MODE_SIZE (mode) > 4
12768 && !(mode == DFmode
12769 && TARGET_FPU_DOUBLE)));
12771 else
12773 pcum->call_cookie = 0;
12774 pcum->force_mem = FALSE;
12779 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12780 not enter into CONST_DOUBLE for the replace.
12782 Note that copying is not done so X must not be shared unless all copies
12783 are to be modified.
12785 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12786 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12787 replacements[n*2+1] - and that we take mode changes into account.
12789 If a replacement is ambiguous, return NULL_RTX.
12791 If MODIFY is zero, don't modify any rtl in place,
12792 just return zero or nonzero for failure / success. */
12795 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12797 int i, j;
12798 const char *fmt;
12800 /* The following prevents loops occurrence when we change MEM in
12801 CONST_DOUBLE onto the same CONST_DOUBLE. */
12802 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
12803 return x;
12805 for (i = n_replacements - 1; i >= 0 ; i--)
12806 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12807 return replacements[i*2+1];
12809 /* Allow this function to make replacements in EXPR_LISTs. */
12810 if (x == NULL_RTX)
12811 return NULL_RTX;
12813 if (GET_CODE (x) == SUBREG)
12815 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12816 n_replacements, modify);
12818 if (CONST_INT_P (new_rtx))
12820 x = simplify_subreg (GET_MODE (x), new_rtx,
12821 GET_MODE (SUBREG_REG (x)),
12822 SUBREG_BYTE (x));
12823 if (! x)
12824 abort ();
12826 else if (modify)
12827 SUBREG_REG (x) = new_rtx;
12829 return x;
12831 else if (REG_P (x))
12833 unsigned regno = REGNO (x);
12834 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12835 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12836 rtx result = NULL_RTX;
12838 for (i = n_replacements - 1; i >= 0; i--)
12840 rtx from = replacements[i*2];
12841 rtx to = replacements[i*2+1];
12842 unsigned from_regno, from_nregs, to_regno, new_regno;
12844 if (!REG_P (from))
12845 continue;
12846 from_regno = REGNO (from);
12847 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12848 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12849 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12851 if (regno < from_regno
12852 || regno + nregs > from_regno + nregs
12853 || !REG_P (to)
12854 || result)
12855 return NULL_RTX;
12856 to_regno = REGNO (to);
12857 if (to_regno < FIRST_PSEUDO_REGISTER)
12859 new_regno = regno + to_regno - from_regno;
12860 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12861 != nregs)
12862 return NULL_RTX;
12863 result = gen_rtx_REG (GET_MODE (x), new_regno);
12865 else if (GET_MODE (x) <= GET_MODE (to))
12866 result = gen_lowpart_common (GET_MODE (x), to);
12867 else
12868 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12871 return result ? result : x;
12873 else if (GET_CODE (x) == ZERO_EXTEND)
12875 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12876 n_replacements, modify);
12878 if (CONST_INT_P (new_rtx))
12880 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12881 new_rtx, GET_MODE (XEXP (x, 0)));
12882 if (! x)
12883 abort ();
12885 else if (modify)
12886 XEXP (x, 0) = new_rtx;
12888 return x;
12891 fmt = GET_RTX_FORMAT (GET_CODE (x));
12892 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12894 rtx new_rtx;
12896 if (fmt[i] == 'e')
12898 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12899 n_replacements, modify);
12900 if (!new_rtx)
12901 return NULL_RTX;
12902 if (modify)
12903 XEXP (x, i) = new_rtx;
12905 else if (fmt[i] == 'E')
12906 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12908 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12909 n_replacements, modify);
12910 if (!new_rtx)
12911 return NULL_RTX;
12912 if (modify)
12913 XVECEXP (x, i, j) = new_rtx;
12917 return x;
12921 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12923 enum rtx_code code = TRUNCATE;
12925 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12927 rtx inner = XEXP (x, 0);
12928 enum machine_mode inner_mode = GET_MODE (inner);
12930 if (inner_mode == mode)
12931 return inner;
12932 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12933 x = inner;
12934 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12935 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12937 code = GET_CODE (x);
12938 x = inner;
12941 return gen_rtx_fmt_e (code, mode, x);
12944 /* called via for_each_rtx after reload, to clean up truncates of
12945 registers that span multiple actual hard registers. */
12947 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12949 rtx x = *p, reg;
12951 if (GET_CODE (x) != TRUNCATE)
12952 return 0;
12953 reg = XEXP (x, 0);
12954 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12956 enum machine_mode reg_mode = GET_MODE (reg);
12957 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12958 subreg_lowpart_offset (DImode, reg_mode));
12959 *(int*) n_changes += 1;
12960 return -1;
12962 return 0;
12965 /* Load and store depend on the highpart of the address. However,
12966 set_attr_alternative does not give well-defined results before reload,
12967 so we must look at the rtl ourselves to see if any of the feeding
12968 registers is used in a memref. */
12970 /* Called by sh_contains_memref_p via for_each_rtx. */
12971 static int
12972 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12974 return (MEM_P (*loc));
12977 /* Return true iff INSN contains a MEM. */
12978 bool
12979 sh_contains_memref_p (rtx insn)
12981 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12984 /* Return true iff INSN loads a banked register. */
12985 bool
12986 sh_loads_bankedreg_p (rtx insn)
12988 if (GET_CODE (PATTERN (insn)) == SET)
12990 rtx op = SET_DEST (PATTERN(insn));
12991 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12992 return true;
12995 return false;
12998 /* FNADDR is the MEM expression from a call expander. Return an address
12999 to use in an SHmedia insn pattern. */
13001 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13003 int is_sym;
13005 fnaddr = XEXP (fnaddr, 0);
13006 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13007 if (flag_pic && is_sym)
13009 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13011 rtx reg = gen_reg_rtx (Pmode);
13013 /* We must not use GOTPLT for sibcalls, because PIC_REG
13014 must be restored before the PLT code gets to run. */
13015 if (is_sibcall)
13016 emit_insn (gen_symGOT2reg (reg, fnaddr));
13017 else
13018 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13019 fnaddr = reg;
13021 else
13023 fnaddr = gen_sym2PIC (fnaddr);
13024 PUT_MODE (fnaddr, Pmode);
13027 /* If ptabs might trap, make this visible to the rest of the compiler.
13028 We generally assume that symbols pertain to valid locations, but
13029 it is possible to generate invalid symbols with asm or linker tricks.
13030 In a list of functions where each returns its successor, an invalid
13031 symbol might denote an empty list. */
13032 if (!TARGET_PT_FIXED
13033 && (!is_sym || TARGET_INVALID_SYMBOLS)
13034 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13036 rtx tr = gen_reg_rtx (PDImode);
13038 emit_insn (gen_ptabs (tr, fnaddr));
13039 fnaddr = tr;
13041 else if (! target_reg_operand (fnaddr, Pmode))
13042 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13043 return fnaddr;
13046 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13048 static reg_class_t
13049 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13051 if (rclass == NO_REGS
13052 && TARGET_SHMEDIA
13053 && (CONST_DOUBLE_P (x)
13054 || GET_CODE (x) == SYMBOL_REF
13055 || PIC_ADDR_P (x)))
13056 return GENERAL_REGS;
13058 return rclass;
13061 /* Implement TARGET_SECONDARY_RELOAD. */
13063 static reg_class_t
13064 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13065 enum machine_mode mode, secondary_reload_info *sri)
13067 enum reg_class rclass = (enum reg_class) rclass_i;
13069 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13070 && REG_P (XEXP (XEXP (x, 0), 0))
13071 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13072 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13074 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13075 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13077 if (REG_P (x) && REGNO (x) == GBR_REG)
13078 return NO_REGS;
13080 if (in_p)
13082 if (REGCLASS_HAS_FP_REG (rclass)
13083 && ! TARGET_SHMEDIA
13084 && immediate_operand ((x), mode)
13085 && ! ((fp_zero_operand (x) || fp_one_operand (x))
13086 && mode == SFmode && fldi_ok ()))
13087 switch (mode)
13089 case SFmode:
13090 sri->icode = CODE_FOR_reload_insf__frn;
13091 return NO_REGS;
13092 case DFmode:
13093 sri->icode = CODE_FOR_reload_indf__frn;
13094 return NO_REGS;
13095 case SImode:
13096 /* ??? If we knew that we are in the appropriate mode -
13097 single precision - we could use a reload pattern directly. */
13098 return FPUL_REGS;
13099 default:
13100 abort ();
13102 if (rclass == FPUL_REGS
13103 && ((REG_P (x)
13104 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13105 || REGNO (x) == T_REG))
13106 || GET_CODE (x) == PLUS))
13107 return GENERAL_REGS;
13108 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13110 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13111 return GENERAL_REGS;
13112 else if (mode == SFmode)
13113 return FP_REGS;
13114 sri->icode = CODE_FOR_reload_insi__i_fpul;
13115 return NO_REGS;
13117 if (rclass == FPSCR_REGS
13118 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13119 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13120 return GENERAL_REGS;
13121 if (REGCLASS_HAS_FP_REG (rclass)
13122 && TARGET_SHMEDIA
13123 && immediate_operand (x, mode)
13124 && x != CONST0_RTX (GET_MODE (x))
13125 && GET_MODE (x) != V4SFmode)
13126 return GENERAL_REGS;
13127 if ((mode == QImode || mode == HImode)
13128 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13130 sri->icode = ((mode == QImode)
13131 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13132 return NO_REGS;
13134 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13135 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13136 return TARGET_REGS;
13137 } /* end of input-only processing. */
13139 if (((REGCLASS_HAS_FP_REG (rclass)
13140 && (REG_P (x)
13141 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13142 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13143 && TARGET_FMOVD))))
13144 || (REGCLASS_HAS_GENERAL_REG (rclass)
13145 && REG_P (x)
13146 && FP_REGISTER_P (REGNO (x))))
13147 && ! TARGET_SHMEDIA
13148 && (mode == SFmode || mode == SImode))
13149 return FPUL_REGS;
13150 if ((rclass == FPUL_REGS
13151 || (REGCLASS_HAS_FP_REG (rclass)
13152 && ! TARGET_SHMEDIA && mode == SImode))
13153 && (MEM_P (x)
13154 || (REG_P (x)
13155 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13156 || REGNO (x) == T_REG
13157 || system_reg_operand (x, VOIDmode)))))
13159 if (rclass == FPUL_REGS)
13160 return GENERAL_REGS;
13161 return FPUL_REGS;
13163 if ((rclass == TARGET_REGS
13164 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13165 && !satisfies_constraint_Csy (x)
13166 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13167 return GENERAL_REGS;
13168 if ((rclass == MAC_REGS || rclass == PR_REGS)
13169 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13170 && rclass != REGNO_REG_CLASS (REGNO (x)))
13171 return GENERAL_REGS;
13172 if (rclass != GENERAL_REGS && REG_P (x)
13173 && TARGET_REGISTER_P (REGNO (x)))
13174 return GENERAL_REGS;
13176 /* If here fall back to loading FPUL register through general registers.
13177 This case can happen when movsi_ie insn is picked initially to
13178 load/store the FPUL register from/to another register, and then the
13179 other register is allocated on the stack. */
13180 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13181 return GENERAL_REGS;
13183 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13184 the other operand.
13185 On SH2A could also just leave it alone here, which would result in a
13186 4 byte move insn being generated instead. However, for this to work
13187 the insns must have the appropriate alternatives. */
13188 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13189 && satisfies_constraint_Sdd (x)
13190 && disp_addr_displacement (x) <= max_mov_insn_displacement (mode, false))
13191 return R0_REGS;
13193 /* When reload is trying to address a QImode or HImode subreg on the stack,
13194 force any subreg byte into R0_REGS, as this is going to become a
13195 displacement address.
13196 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13197 is on the stack, the memref to it might already require a displacement
13198 and that has to be added to the final address. At this point we don't
13199 know the cumulative displacement so we assume the worst case. */
13200 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13201 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13202 return R0_REGS;
13204 return NO_REGS;
13207 static void
13208 sh_conditional_register_usage (void)
13210 int regno;
13211 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13212 if (! VALID_REGISTER_P (regno))
13213 fixed_regs[regno] = call_used_regs[regno] = 1;
13214 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13215 if (TARGET_SH5)
13217 call_used_regs[FIRST_GENERAL_REG + 8]
13218 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13219 call_really_used_regs[FIRST_GENERAL_REG + 8]
13220 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13222 if (TARGET_SHMEDIA)
13224 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13225 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13226 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13228 if (flag_pic)
13230 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13231 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13233 /* Renesas saves and restores mac registers on call. */
13234 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13236 call_really_used_regs[MACH_REG] = 0;
13237 call_really_used_regs[MACL_REG] = 0;
13239 for (regno = FIRST_FP_REG + (TARGET_LITTLE_ENDIAN != 0);
13240 regno <= LAST_FP_REG; regno += 2)
13241 SET_HARD_REG_BIT (reg_class_contents[DF_HI_REGS], regno);
13242 if (TARGET_SHMEDIA)
13244 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13245 if (! fixed_regs[regno] && call_really_used_regs[regno])
13246 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13248 else
13249 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13250 if (! fixed_regs[regno] && call_really_used_regs[regno])
13251 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13254 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13256 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13258 static bool
13259 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
13261 return (TARGET_SHMEDIA
13262 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13263 || x == CONST0_RTX (mode)
13264 || !TARGET_SHMEDIA_FPU
13265 || TARGET_SHMEDIA64)
13266 : (GET_CODE (x) != CONST_DOUBLE
13267 || mode == DFmode || mode == SFmode
13268 || mode == DImode || GET_MODE (x) == VOIDmode));
13271 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13273 static void
13274 sh_init_sync_libfuncs (void)
13276 init_sync_libfuncs (UNITS_PER_WORD);
13279 /* Return true if it is appropriate to emit `ret' instructions in the
13280 body of a function. */
13282 bool
13283 sh_can_use_simple_return_p (void)
13285 HARD_REG_SET live_regs_mask;
13286 int d;
13288 /* Some targets require special return insns. */
13289 if (TARGET_SHMEDIA
13290 || (TARGET_SHCOMPACT
13291 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13292 return false;
13294 if (! reload_completed || frame_pointer_needed)
13295 return false;
13297 /* Moving prologue around does't reduce the size. */
13298 if (optimize_function_for_size_p (cfun))
13299 return false;
13301 /* Can't optimize CROSSING_JUMPS for now. */
13302 if (flag_reorder_blocks_and_partition)
13303 return false;
13305 /* Finally, allow for pr save. */
13306 d = calc_live_regs (&live_regs_mask);
13308 if (rounded_frame_size (d) > 4)
13309 return false;
13311 return true;
13314 /*------------------------------------------------------------------------------
13315 Address mode optimization support code
13318 typedef HOST_WIDE_INT disp_t;
13319 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13320 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13321 static const disp_t INVALID_DISP = MAX_DISP;
13323 /* A memory reference which is described by a base register and a
13324 displacement. */
13325 class base_reg_disp
13327 public:
13328 base_reg_disp (rtx br, disp_t d);
13330 bool is_reg (void) const;
13331 bool is_disp (void) const;
13332 rtx reg (void) const;
13333 disp_t disp (void) const;
13335 private:
13336 rtx reg_;
13337 disp_t disp_;
13340 inline
13341 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13342 : reg_ (br), disp_ (d)
13346 inline bool
13347 base_reg_disp::is_reg (void) const
13349 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13352 inline bool
13353 base_reg_disp::is_disp (void) const
13355 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13358 inline rtx
13359 base_reg_disp::reg (void) const
13361 return reg_;
13364 inline disp_t
13365 base_reg_disp::disp (void) const
13367 return disp_;
13370 /* Find the base register and calculate the displacement for a given
13371 address rtx 'x'.
13372 This is done by walking the insn list backwards and following SET insns
13373 that set the value of the specified reg 'x'. */
13374 static base_reg_disp
13375 sh_find_base_reg_disp (rtx insn, rtx x, disp_t disp = 0, rtx base_reg = NULL)
13377 if (REG_P (x))
13379 if (REGNO (x) == GBR_REG)
13380 return base_reg_disp (x, disp);
13382 /* We've reached a hard-reg. This is probably the point where
13383 function args are copied to pseudos. Do not go any further and
13384 stick to the pseudo. If the original mem addr was in a hard reg
13385 from the beginning, it will become the base reg. */
13386 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13387 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13389 /* Try to find the previous insn that sets the reg. */
13390 for (rtx i = prev_nonnote_insn (insn); i != NULL;
13391 i = prev_nonnote_insn (i))
13393 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)
13394 && CALL_P (i))
13395 break;
13397 if (!NONJUMP_INSN_P (i))
13398 continue;
13400 rtx p = PATTERN (i);
13401 if (p != NULL && GET_CODE (p) == SET && REG_P (XEXP (p, 0))
13402 && REGNO (XEXP (p, 0)) == REGNO (x))
13404 /* If the recursion can't find out any more details about the
13405 source of the set, then this reg becomes our new base reg. */
13406 return sh_find_base_reg_disp (i, XEXP (p, 1), disp, XEXP (p, 0));
13410 /* When here, no previous insn was found that sets the reg.
13411 The input reg is already the base reg. */
13412 return base_reg_disp (x, disp);
13415 else if (GET_CODE (x) == PLUS)
13417 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13418 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13420 /* Either left or right val must be a reg.
13421 We don't handle the case of 'reg + reg' here. */
13422 if (left_val.is_reg () && right_val.is_disp ())
13423 return base_reg_disp (left_val.reg (), left_val.disp ()
13424 + right_val.disp () + disp);
13425 else if (right_val.is_reg () && left_val.is_disp ())
13426 return base_reg_disp (right_val.reg (), right_val.disp ()
13427 + left_val.disp () + disp);
13428 else
13429 return base_reg_disp (base_reg, disp);
13432 else if (CONST_INT_P (x))
13433 return base_reg_disp (NULL, disp + INTVAL (x));
13435 /* Didn't find anything useful. */
13436 return base_reg_disp (base_reg, disp);
13439 /* Given an insn and a memory operand, try to find an equivalent GBR
13440 based memory address and return the corresponding new memory address.
13441 Return NULL_RTX if not found. */
13443 sh_find_equiv_gbr_addr (rtx insn, rtx mem)
13445 if (!MEM_P (mem))
13446 return NULL_RTX;
13448 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13449 if (side_effects_p (XEXP (mem, 0)))
13450 return NULL_RTX;
13452 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13454 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13456 rtx disp = GEN_INT (gbr_disp.disp ());
13457 if (gbr_displacement (disp, GET_MODE (mem)))
13458 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13461 return NULL_RTX;
13464 /*------------------------------------------------------------------------------
13465 Manual insn combine support code.
13468 /* Given a reg rtx and a start insn, try to find the insn that sets the
13469 specified reg by using the specified insn stepping function, such as
13470 'prev_nonnote_insn_bb'. When the insn is found, try to extract the rtx
13471 of the reg set. */
13472 set_of_reg
13473 sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx))
13475 set_of_reg result;
13476 result.insn = insn;
13477 result.set_rtx = NULL_RTX;
13478 result.set_src = NULL_RTX;
13480 if (!REG_P (reg) || insn == NULL_RTX)
13481 return result;
13483 for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
13484 result.insn = stepfunc (result.insn))
13486 if (LABEL_P (result.insn) || BARRIER_P (result.insn))
13487 return result;
13488 if (!NONJUMP_INSN_P (result.insn))
13489 continue;
13490 if (reg_set_p (reg, result.insn))
13492 result.set_rtx = set_of (reg, result.insn);
13494 if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
13495 return result;
13497 result.set_src = XEXP (result.set_rtx, 1);
13498 return result;
13502 return result;
13505 /* Given an op rtx and an insn, try to find out whether the result of the
13506 specified op consists only of logical operations on T bit stores. */
13507 bool
13508 sh_is_logical_t_store_expr (rtx op, rtx insn)
13510 if (!logical_operator (op, SImode))
13511 return false;
13513 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13514 int op_is_t_count = 0;
13516 for (int i = 0; i < 2; ++i)
13518 if (t_reg_operand (ops[i], VOIDmode)
13519 || negt_reg_operand (ops[i], VOIDmode))
13520 op_is_t_count++;
13522 else
13524 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13525 prev_nonnote_insn_bb);
13526 if (op_set.set_src == NULL_RTX)
13527 continue;
13529 if (t_reg_operand (op_set.set_src, VOIDmode)
13530 || negt_reg_operand (op_set.set_src, VOIDmode)
13531 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13532 op_is_t_count++;
13536 return op_is_t_count == 2;
13539 /* Given the operand that is extended in a sign/zero extend insn, and the
13540 insn, try to figure out whether the sign/zero extension can be replaced
13541 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13542 NULL_RTX otherwise. */
13544 sh_try_omit_signzero_extend (rtx extended_op, rtx insn)
13546 if (REG_P (extended_op))
13547 extended_op = extended_op;
13548 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13549 extended_op = SUBREG_REG (extended_op);
13550 else
13551 return NULL_RTX;
13553 /* Reg moves must be of the same mode. */
13554 if (GET_MODE (extended_op) != SImode)
13555 return NULL_RTX;
13557 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13558 if (s.set_src == NULL_RTX)
13559 return NULL_RTX;
13561 if (t_reg_operand (s.set_src, VOIDmode)
13562 || negt_reg_operand (s.set_src, VOIDmode))
13563 return extended_op;
13565 /* If the zero extended reg was formed by a logical operation, check the
13566 operands of the logical operation. If both originated from T bit
13567 stores the zero extension can be eliminated. */
13568 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13569 return extended_op;
13571 return NULL_RTX;
13574 #include "gt-sh.h"