Merge branches/gcc-4_8-branch rev 216856
[official-gcc.git] / gcc-4_8-branch / gcc / config / sh / sh.c
blobe818a799b4d234a42656a920698b1117b3ceca4f
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2013 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* FIXME: This is a temporary hack, so that we can include <algorithm>
23 below. <algorithm> will try to include <cstdlib> which will reference
24 malloc & co, which are poisoned by "system.h". The proper solution is
25 to include <cstdlib> in "system.h" instead of <stdlib.h>. */
26 #include <cstdlib>
27 #include <sstream>
28 #include <vector>
29 #include <algorithm>
31 #include "config.h"
32 #include "system.h"
33 #include "coretypes.h"
34 #include "tm.h"
35 #include "insn-config.h"
36 #include "rtl.h"
37 #include "tree.h"
38 #include "flags.h"
39 #include "expr.h"
40 #include "optabs.h"
41 #include "reload.h"
42 #include "function.h"
43 #include "regs.h"
44 #include "hard-reg-set.h"
45 #include "output.h"
46 #include "insn-attr.h"
47 #include "diagnostic-core.h"
48 #include "recog.h"
49 #include "dwarf2.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "langhooks.h"
54 #include "basic-block.h"
55 #include "df.h"
56 #include "intl.h"
57 #include "sched-int.h"
58 #include "params.h"
59 #include "ggc.h"
60 #include "gimple.h"
61 #include "cfgloop.h"
62 #include "alloc-pool.h"
63 #include "tm-constrs.h"
64 #include "opts.h"
66 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
68 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
69 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
71 /* These are some macros to abstract register modes. */
72 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
73 && ((HOST_WIDE_INT)(VALUE)) <= 511)
75 #define CONST_OK_FOR_ADD(size) \
76 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
77 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
78 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
79 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
81 /* Used to simplify the logic below. Find the attributes wherever
82 they may be. */
83 #define SH_ATTRIBUTES(decl) \
84 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
85 : DECL_ATTRIBUTES (decl) \
86 ? (DECL_ATTRIBUTES (decl)) \
87 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
89 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
90 int current_function_interrupt;
92 tree sh_deferred_function_attributes;
93 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
95 /* Global variables for machine-dependent things. */
97 /* Which cpu are we scheduling for. */
98 enum processor_type sh_cpu;
100 /* Definitions used in ready queue reordering for first scheduling pass. */
102 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
103 static short *regmode_weight[2];
105 /* Total SFmode and SImode weights of scheduled insns. */
106 static int curr_regmode_pressure[2];
108 /* Number of r0 life regions. */
109 static int r0_life_regions;
111 /* If true, skip cycles for Q -> R movement. */
112 static int skip_cycles = 0;
114 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
115 and returned from sh_reorder2. */
116 static short cached_can_issue_more;
118 /* Unique number for UNSPEC_BBR pattern. */
119 static unsigned int unspec_bbr_uid = 1;
121 /* Provides the class number of the smallest class containing
122 reg number. */
123 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
125 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
158 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
159 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
160 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
161 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
162 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
163 GENERAL_REGS, GENERAL_REGS,
166 char sh_register_names[FIRST_PSEUDO_REGISTER] \
167 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
169 char sh_additional_register_names[ADDREGNAMES_SIZE] \
170 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
171 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
173 int assembler_dialect;
175 static bool shmedia_space_reserved_for_target_registers;
177 static void split_branches (rtx);
178 static int branch_dest (rtx);
179 static void force_into (rtx, rtx);
180 static void print_slot (rtx);
181 static rtx add_constant (rtx, enum machine_mode, rtx);
182 static void dump_table (rtx, rtx);
183 static bool broken_move (rtx);
184 static bool mova_p (rtx);
185 static rtx find_barrier (int, rtx, rtx);
186 static bool noncall_uses_reg (rtx, rtx, rtx *);
187 static rtx gen_block_redirect (rtx, int, int);
188 static void sh_reorg (void);
189 static void sh_option_override (void);
190 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
191 static rtx frame_insn (rtx);
192 static rtx push (int);
193 static void pop (int);
194 static void push_regs (HARD_REG_SET *, int);
195 static int calc_live_regs (HARD_REG_SET *);
196 static HOST_WIDE_INT rounded_frame_size (int);
197 static bool sh_frame_pointer_required (void);
198 static rtx mark_constant_pool_use (rtx);
199 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
200 int, bool *);
201 static tree sh_handle_resbank_handler_attribute (tree *, tree,
202 tree, int, bool *);
203 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
204 tree, int, bool *);
205 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
206 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
207 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
208 static void sh_print_operand (FILE *, rtx, int);
209 static void sh_print_operand_address (FILE *, rtx);
210 static bool sh_print_operand_punct_valid_p (unsigned char code);
211 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
212 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
213 static void sh_insert_attributes (tree, tree *);
214 static const char *sh_check_pch_target_flags (int);
215 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
216 static int sh_adjust_cost (rtx, rtx, rtx, int);
217 static int sh_issue_rate (void);
218 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
219 static short find_set_regmode_weight (rtx, enum machine_mode);
220 static short find_insn_regmode_weight (rtx, enum machine_mode);
221 static void find_regmode_weight (basic_block, enum machine_mode);
222 static int find_r0_life_regions (basic_block);
223 static void sh_md_init_global (FILE *, int, int);
224 static void sh_md_finish_global (FILE *, int);
225 static int rank_for_reorder (const void *, const void *);
226 static void swap_reorder (rtx *, int);
227 static void ready_reorder (rtx *, int);
228 static bool high_pressure (enum machine_mode);
229 static int sh_reorder (FILE *, int, rtx *, int *, int);
230 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
231 static void sh_md_init (FILE *, int, int);
232 static int sh_variable_issue (FILE *, int, rtx, int);
234 static bool sh_function_ok_for_sibcall (tree, tree);
236 static bool sh_cannot_modify_jumps_p (void);
237 static reg_class_t sh_target_reg_class (void);
238 static bool sh_optimize_target_register_callee_saved (bool);
239 static bool sh_ms_bitfield_layout_p (const_tree);
241 static void sh_init_builtins (void);
242 static tree sh_builtin_decl (unsigned, bool);
243 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
244 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
245 HOST_WIDE_INT, tree);
246 static void sh_file_start (void);
247 static bool flow_dependent_p (rtx, rtx);
248 static void flow_dependent_p_1 (rtx, const_rtx, void *);
249 static int shiftcosts (rtx);
250 static int and_xor_ior_costs (rtx, int);
251 static int addsubcosts (rtx);
252 static int multcosts (rtx);
253 static bool unspec_caller_rtx_p (rtx);
254 static bool sh_cannot_copy_insn_p (rtx);
255 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
256 static int sh_address_cost (rtx, enum machine_mode, addr_space_t, bool);
257 static int sh_pr_n_sets (void);
258 static rtx sh_allocate_initial_value (rtx);
259 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
260 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
261 enum machine_mode,
262 struct secondary_reload_info *);
263 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
264 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
265 static rtx sh_delegitimize_address (rtx);
266 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
267 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
268 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
269 static int scavenge_reg (HARD_REG_SET *s);
270 struct save_schedule_s;
271 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
272 struct save_schedule_s *, int);
274 static rtx sh_struct_value_rtx (tree, int);
275 static rtx sh_function_value (const_tree, const_tree, bool);
276 static bool sh_function_value_regno_p (const unsigned int);
277 static rtx sh_libcall_value (enum machine_mode, const_rtx);
278 static bool sh_return_in_memory (const_tree, const_tree);
279 static rtx sh_builtin_saveregs (void);
280 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
281 tree, int *, int);
282 static bool sh_strict_argument_naming (cumulative_args_t);
283 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
284 static tree sh_build_builtin_va_list (void);
285 static void sh_va_start (tree, rtx);
286 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
287 static bool sh_promote_prototypes (const_tree);
288 static enum machine_mode sh_promote_function_mode (const_tree type,
289 enum machine_mode,
290 int *punsignedp,
291 const_tree funtype,
292 int for_return);
293 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
294 const_tree, bool);
295 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
296 const_tree, bool);
297 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
298 tree, bool);
299 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
300 const_tree, bool);
301 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
302 const_tree, bool);
303 static bool sh_scalar_mode_supported_p (enum machine_mode);
304 static int sh_dwarf_calling_convention (const_tree);
305 static void sh_encode_section_info (tree, rtx, int);
306 static bool sh2a_function_vector_p (tree);
307 static void sh_trampoline_init (rtx, tree, rtx);
308 static rtx sh_trampoline_adjust_address (rtx);
309 static void sh_conditional_register_usage (void);
310 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
311 static int mov_insn_size (enum machine_mode, bool);
312 static int mov_insn_alignment_mask (enum machine_mode, bool);
313 static bool sequence_insn_p (rtx);
314 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
315 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
316 enum machine_mode, bool);
318 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
320 static const struct attribute_spec sh_attribute_table[] =
322 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
323 affects_type_identity } */
324 { "interrupt_handler", 0, 0, true, false, false,
325 sh_handle_interrupt_handler_attribute, false },
326 { "sp_switch", 1, 1, true, false, false,
327 sh_handle_sp_switch_attribute, false },
328 { "trap_exit", 1, 1, true, false, false,
329 sh_handle_trap_exit_attribute, false },
330 { "renesas", 0, 0, false, true, false,
331 sh_handle_renesas_attribute, false },
332 { "trapa_handler", 0, 0, true, false, false,
333 sh_handle_interrupt_handler_attribute, false },
334 { "nosave_low_regs", 0, 0, true, false, false,
335 sh_handle_interrupt_handler_attribute, false },
336 { "resbank", 0, 0, true, false, false,
337 sh_handle_resbank_handler_attribute, false },
338 { "function_vector", 1, 1, true, false, false,
339 sh2a_handle_function_vector_handler_attribute, false },
340 { NULL, 0, 0, false, false, false, NULL, false }
343 /* Initialize the GCC target structure. */
344 #undef TARGET_ATTRIBUTE_TABLE
345 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
347 /* The next two are used for debug info when compiling with -gdwarf. */
348 #undef TARGET_ASM_UNALIGNED_HI_OP
349 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
350 #undef TARGET_ASM_UNALIGNED_SI_OP
351 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
353 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
354 #undef TARGET_ASM_UNALIGNED_DI_OP
355 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
356 #undef TARGET_ASM_ALIGNED_DI_OP
357 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
359 #undef TARGET_OPTION_OVERRIDE
360 #define TARGET_OPTION_OVERRIDE sh_option_override
362 #undef TARGET_PRINT_OPERAND
363 #define TARGET_PRINT_OPERAND sh_print_operand
364 #undef TARGET_PRINT_OPERAND_ADDRESS
365 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
366 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
367 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
368 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
369 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
371 #undef TARGET_ASM_FUNCTION_EPILOGUE
372 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
374 #undef TARGET_ASM_OUTPUT_MI_THUNK
375 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
377 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
378 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
379 hook_bool_const_tree_hwi_hwi_const_tree_true
381 #undef TARGET_ASM_FILE_START
382 #define TARGET_ASM_FILE_START sh_file_start
383 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
384 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
386 #undef TARGET_REGISTER_MOVE_COST
387 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
389 #undef TARGET_INSERT_ATTRIBUTES
390 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
392 #undef TARGET_SCHED_ADJUST_COST
393 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
395 #undef TARGET_SCHED_ISSUE_RATE
396 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
398 /* The next 5 hooks have been implemented for reenabling sched1. With the
399 help of these macros we are limiting the movement of insns in sched1 to
400 reduce the register pressure. The overall idea is to keep count of SImode
401 and SFmode regs required by already scheduled insns. When these counts
402 cross some threshold values; give priority to insns that free registers.
403 The insn that frees registers is most likely to be the insn with lowest
404 LUID (original insn order); but such an insn might be there in the stalled
405 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
406 up to a max of 8 cycles so that such insns may move from Q -> R.
408 The description of the hooks are as below:
410 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
411 scheduler; it is called inside the sched_init function just after
412 find_insn_reg_weights function call. It is used to calculate the SImode
413 and SFmode weights of insns of basic blocks; much similar to what
414 find_insn_reg_weights does.
415 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
417 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
418 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
419 (Q)->(R).
421 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
422 high; reorder the ready queue so that the insn with lowest LUID will be
423 issued next.
425 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
426 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
428 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
429 can be returned from TARGET_SCHED_REORDER2.
431 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
433 #undef TARGET_SCHED_DFA_NEW_CYCLE
434 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
436 #undef TARGET_SCHED_INIT_GLOBAL
437 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
439 #undef TARGET_SCHED_FINISH_GLOBAL
440 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
442 #undef TARGET_SCHED_VARIABLE_ISSUE
443 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
445 #undef TARGET_SCHED_REORDER
446 #define TARGET_SCHED_REORDER sh_reorder
448 #undef TARGET_SCHED_REORDER2
449 #define TARGET_SCHED_REORDER2 sh_reorder2
451 #undef TARGET_SCHED_INIT
452 #define TARGET_SCHED_INIT sh_md_init
454 #undef TARGET_DELEGITIMIZE_ADDRESS
455 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
457 #undef TARGET_LEGITIMIZE_ADDRESS
458 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
460 #undef TARGET_CANNOT_MODIFY_JUMPS_P
461 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
462 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
463 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
464 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
465 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
466 sh_optimize_target_register_callee_saved
468 #undef TARGET_MS_BITFIELD_LAYOUT_P
469 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
471 #undef TARGET_INIT_BUILTINS
472 #define TARGET_INIT_BUILTINS sh_init_builtins
473 #undef TARGET_BUILTIN_DECL
474 #define TARGET_BUILTIN_DECL sh_builtin_decl
475 #undef TARGET_EXPAND_BUILTIN
476 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
478 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
479 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
481 #undef TARGET_CANNOT_COPY_INSN_P
482 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
483 #undef TARGET_RTX_COSTS
484 #define TARGET_RTX_COSTS sh_rtx_costs
485 #undef TARGET_ADDRESS_COST
486 #define TARGET_ADDRESS_COST sh_address_cost
487 #undef TARGET_ALLOCATE_INITIAL_VALUE
488 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
490 #undef TARGET_MACHINE_DEPENDENT_REORG
491 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
493 #undef TARGET_DWARF_REGISTER_SPAN
494 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
496 #ifdef HAVE_AS_TLS
497 #undef TARGET_HAVE_TLS
498 #define TARGET_HAVE_TLS true
499 #endif
501 #undef TARGET_PROMOTE_PROTOTYPES
502 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
503 #undef TARGET_PROMOTE_FUNCTION_MODE
504 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
506 #undef TARGET_FUNCTION_VALUE
507 #define TARGET_FUNCTION_VALUE sh_function_value
508 #undef TARGET_FUNCTION_VALUE_REGNO_P
509 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
510 #undef TARGET_LIBCALL_VALUE
511 #define TARGET_LIBCALL_VALUE sh_libcall_value
512 #undef TARGET_STRUCT_VALUE_RTX
513 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
514 #undef TARGET_RETURN_IN_MEMORY
515 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
517 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
518 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
519 #undef TARGET_SETUP_INCOMING_VARARGS
520 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
521 #undef TARGET_STRICT_ARGUMENT_NAMING
522 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
523 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
524 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
525 #undef TARGET_MUST_PASS_IN_STACK
526 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
527 #undef TARGET_PASS_BY_REFERENCE
528 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
529 #undef TARGET_CALLEE_COPIES
530 #define TARGET_CALLEE_COPIES sh_callee_copies
531 #undef TARGET_ARG_PARTIAL_BYTES
532 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
533 #undef TARGET_FUNCTION_ARG
534 #define TARGET_FUNCTION_ARG sh_function_arg
535 #undef TARGET_FUNCTION_ARG_ADVANCE
536 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
538 #undef TARGET_BUILD_BUILTIN_VA_LIST
539 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
540 #undef TARGET_EXPAND_BUILTIN_VA_START
541 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
542 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
543 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
545 #undef TARGET_SCALAR_MODE_SUPPORTED_P
546 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
547 #undef TARGET_VECTOR_MODE_SUPPORTED_P
548 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
550 #undef TARGET_CHECK_PCH_TARGET_FLAGS
551 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
553 #undef TARGET_DWARF_CALLING_CONVENTION
554 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
556 #undef TARGET_FRAME_POINTER_REQUIRED
557 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
559 /* Return regmode weight for insn. */
560 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
561 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
563 /* Return current register pressure for regmode. */
564 #define CURR_REGMODE_PRESSURE(MODE)\
565 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
567 #undef TARGET_ENCODE_SECTION_INFO
568 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
570 #undef TARGET_SECONDARY_RELOAD
571 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
573 #undef TARGET_PREFERRED_RELOAD_CLASS
574 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
576 #undef TARGET_CONDITIONAL_REGISTER_USAGE
577 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
579 #undef TARGET_LEGITIMATE_ADDRESS_P
580 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
582 #undef TARGET_TRAMPOLINE_INIT
583 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
584 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
585 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
587 #undef TARGET_LEGITIMATE_CONSTANT_P
588 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
590 #undef TARGET_CANONICALIZE_COMPARISON
591 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
593 /* Machine-specific symbol_ref flags. */
594 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
596 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
597 is used by optabs.c atomic op expansion code as well as in sync.md. */
598 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
599 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
601 struct gcc_target targetm = TARGET_INITIALIZER;
604 /* Information on the currently selected atomic model.
605 This is initialized in sh_option_override. */
606 static sh_atomic_model selected_atomic_model_;
608 const sh_atomic_model&
609 selected_atomic_model (void)
611 return selected_atomic_model_;
614 static sh_atomic_model
615 parse_validate_atomic_model_option (const char* str)
617 const char* model_names[sh_atomic_model::num_models];
618 model_names[sh_atomic_model::none] = "none";
619 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
620 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
621 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
622 model_names[sh_atomic_model::soft_imask] = "soft-imask";
624 const char* model_cdef_names[sh_atomic_model::num_models];
625 model_cdef_names[sh_atomic_model::none] = "NONE";
626 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
627 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
628 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
629 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
631 sh_atomic_model ret;
632 ret.type = sh_atomic_model::none;
633 ret.name = model_names[sh_atomic_model::none];
634 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
635 ret.strict = false;
636 ret.tcb_gbr_offset = -1;
638 /* Handle empty string as 'none'. */
639 if (str == NULL || *str == '\0')
640 return ret;
642 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
644 std::vector<std::string> tokens;
645 for (std::stringstream ss (str); ss.good (); )
647 tokens.push_back (std::string ());
648 std::getline (ss, tokens.back (), ',');
651 if (tokens.empty ())
652 err_ret ("invalid atomic model option");
654 /* The first token must be the atomic model name. */
656 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
657 if (tokens.front () == model_names[i])
659 ret.type = (sh_atomic_model::enum_type)i;
660 ret.name = model_names[i];
661 ret.cdef_name = model_cdef_names[i];
662 goto got_mode_name;
665 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
666 got_mode_name:;
669 /* Go through the remaining tokens. */
670 for (size_t i = 1; i < tokens.size (); ++i)
672 if (tokens[i] == "strict")
673 ret.strict = true;
674 else if (tokens[i].find ("gbr-offset=") == 0)
676 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
677 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
678 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
679 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
680 "option", offset_str.c_str ());
682 else
683 err_ret ("unknown parameter \"%s\" in atomic model option",
684 tokens[i].c_str ());
687 /* Check that the selection makes sense. */
688 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
689 err_ret ("atomic operations are not supported on SHmedia");
691 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
692 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
693 ret.name);
695 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
696 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
698 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
699 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
701 if (ret.type == sh_atomic_model::soft_tcb
702 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
703 || (ret.tcb_gbr_offset & 3) != 0))
704 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
705 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
706 ret.name);
708 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
709 err_ret ("cannot use atomic model %s in user mode", ret.name);
711 return ret;
713 #undef err_ret
716 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
717 various options, and do some machine dependent initialization. */
718 static void
719 sh_option_override (void)
721 int regno;
723 SUBTARGET_OVERRIDE_OPTIONS;
724 if (optimize > 1 && !optimize_size)
725 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
726 sh_cpu = PROCESSOR_SH1;
727 assembler_dialect = 0;
728 if (TARGET_SH2)
729 sh_cpu = PROCESSOR_SH2;
730 if (TARGET_SH2E)
731 sh_cpu = PROCESSOR_SH2E;
732 if (TARGET_SH2A)
733 sh_cpu = PROCESSOR_SH2A;
734 if (TARGET_SH3)
735 sh_cpu = PROCESSOR_SH3;
736 if (TARGET_SH3E)
737 sh_cpu = PROCESSOR_SH3E;
738 if (TARGET_SH4)
740 assembler_dialect = 1;
741 sh_cpu = PROCESSOR_SH4;
743 if (TARGET_SH4A_ARCH)
745 assembler_dialect = 1;
746 sh_cpu = PROCESSOR_SH4A;
748 if (TARGET_SH5)
750 sh_cpu = PROCESSOR_SH5;
751 target_flags |= MASK_ALIGN_DOUBLE;
752 if (TARGET_SHMEDIA_FPU)
753 target_flags |= MASK_FMOVD;
754 if (TARGET_SHMEDIA)
756 /* There are no delay slots on SHmedia. */
757 flag_delayed_branch = 0;
758 /* Relaxation isn't yet supported for SHmedia */
759 target_flags &= ~MASK_RELAX;
760 /* After reload, if conversion does little good but can cause
761 ICEs:
762 - find_if_block doesn't do anything for SH because we don't
763 have conditional execution patterns. (We use conditional
764 move patterns, which are handled differently, and only
765 before reload).
766 - find_cond_trap doesn't do anything for the SH because we
767 don't have conditional traps.
768 - find_if_case_1 uses redirect_edge_and_branch_force in
769 the only path that does an optimization, and this causes
770 an ICE when branch targets are in registers.
771 - find_if_case_2 doesn't do anything for the SHmedia after
772 reload except when it can redirect a tablejump - and
773 that's rather rare. */
774 flag_if_conversion2 = 0;
775 if (! strcmp (sh_div_str, "call"))
776 sh_div_strategy = SH_DIV_CALL;
777 else if (! strcmp (sh_div_str, "call2"))
778 sh_div_strategy = SH_DIV_CALL2;
779 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
780 sh_div_strategy = SH_DIV_FP;
781 else if (! strcmp (sh_div_str, "inv"))
782 sh_div_strategy = SH_DIV_INV;
783 else if (! strcmp (sh_div_str, "inv:minlat"))
784 sh_div_strategy = SH_DIV_INV_MINLAT;
785 else if (! strcmp (sh_div_str, "inv20u"))
786 sh_div_strategy = SH_DIV_INV20U;
787 else if (! strcmp (sh_div_str, "inv20l"))
788 sh_div_strategy = SH_DIV_INV20L;
789 else if (! strcmp (sh_div_str, "inv:call2"))
790 sh_div_strategy = SH_DIV_INV_CALL2;
791 else if (! strcmp (sh_div_str, "inv:call"))
792 sh_div_strategy = SH_DIV_INV_CALL;
793 else if (! strcmp (sh_div_str, "inv:fp"))
795 if (TARGET_FPU_ANY)
796 sh_div_strategy = SH_DIV_INV_FP;
797 else
798 sh_div_strategy = SH_DIV_INV;
800 TARGET_CBRANCHDI4 = 0;
801 /* Assembler CFI isn't yet fully supported for SHmedia. */
802 flag_dwarf2_cfi_asm = 0;
805 else
807 /* Only the sh64-elf assembler fully supports .quad properly. */
808 targetm.asm_out.aligned_op.di = NULL;
809 targetm.asm_out.unaligned_op.di = NULL;
812 /* User/priviledged mode is supported only on SH3*, SH4* and SH5*.
813 Disable it for everything else. */
814 if (! (TARGET_SH3 || TARGET_SH5) && TARGET_USERMODE)
815 TARGET_USERMODE = false;
817 if (TARGET_SH1)
819 if (! strcmp (sh_div_str, "call-div1"))
820 sh_div_strategy = SH_DIV_CALL_DIV1;
821 else if (! strcmp (sh_div_str, "call-fp")
822 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
823 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
824 sh_div_strategy = SH_DIV_CALL_FP;
825 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
826 sh_div_strategy = SH_DIV_CALL_TABLE;
827 else
828 /* Pick one that makes most sense for the target in general.
829 It is not much good to use different functions depending
830 on -Os, since then we'll end up with two different functions
831 when some of the code is compiled for size, and some for
832 speed. */
834 /* SH4 tends to emphasize speed. */
835 if (TARGET_HARD_SH4)
836 sh_div_strategy = SH_DIV_CALL_TABLE;
837 /* These have their own way of doing things. */
838 else if (TARGET_SH2A)
839 sh_div_strategy = SH_DIV_INTRINSIC;
840 /* ??? Should we use the integer SHmedia function instead? */
841 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
842 sh_div_strategy = SH_DIV_CALL_FP;
843 /* SH1 .. SH3 cores often go into small-footprint systems, so
844 default to the smallest implementation available. */
845 else
846 sh_div_strategy = SH_DIV_CALL_DIV1;
848 if (!TARGET_SH1)
849 TARGET_PRETEND_CMOVE = 0;
850 if (sh_divsi3_libfunc[0])
851 ; /* User supplied - leave it alone. */
852 else if (TARGET_DIVIDE_CALL_FP)
853 sh_divsi3_libfunc = "__sdivsi3_i4";
854 else if (TARGET_DIVIDE_CALL_TABLE)
855 sh_divsi3_libfunc = "__sdivsi3_i4i";
856 else if (TARGET_SH5)
857 sh_divsi3_libfunc = "__sdivsi3_1";
858 else
859 sh_divsi3_libfunc = "__sdivsi3";
860 if (sh_branch_cost == -1)
862 sh_branch_cost = 1;
864 /* The SH1 does not have delay slots, hence we get a pipeline stall
865 at every branch. The SH4 is superscalar, so the single delay slot
866 is not sufficient to keep both pipelines filled. */
867 if (! TARGET_SH2 || TARGET_HARD_SH4)
868 sh_branch_cost = 2;
871 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
872 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
873 TARGET_ZDCBRANCH = 1;
875 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
876 if (! VALID_REGISTER_P (regno))
877 sh_register_names[regno][0] = '\0';
879 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
880 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
881 sh_additional_register_names[regno][0] = '\0';
883 if ((flag_pic && ! TARGET_PREFERGOT)
884 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
885 flag_no_function_cse = 1;
887 if (targetm.small_register_classes_for_mode_p (VOIDmode))
889 /* Never run scheduling before reload, since that can
890 break global alloc, and generates slower code anyway due
891 to the pressure on R0. */
892 /* Enable sched1 for SH4 if the user explicitly requests.
893 When sched1 is enabled, the ready queue will be reordered by
894 the target hooks if pressure is high. We can not do this for
895 PIC, SH3 and lower as they give spill failures for R0. */
896 if (!TARGET_HARD_SH4 || flag_pic)
897 flag_schedule_insns = 0;
898 /* ??? Current exception handling places basic block boundaries
899 after call_insns. It causes the high pressure on R0 and gives
900 spill failures for R0 in reload. See PR 22553 and the thread
901 on gcc-patches
902 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
903 else if (flag_exceptions)
905 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
906 warning (0, "ignoring -fschedule-insns because of exception "
907 "handling bug");
908 flag_schedule_insns = 0;
910 else if (flag_schedule_insns
911 && !global_options_set.x_flag_schedule_insns)
912 flag_schedule_insns = 0;
915 /* Unwind info is not correct around the CFG unless either a frame
916 pointer is present or M_A_O_A is set. Fixing this requires rewriting
917 unwind info generation to be aware of the CFG and propagating states
918 around edges. */
919 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
920 || flag_exceptions || flag_non_call_exceptions)
921 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
923 warning (0, "unwind tables currently require either a frame pointer "
924 "or -maccumulate-outgoing-args for correctness");
925 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
928 /* Unwinding with -freorder-blocks-and-partition does not work on this
929 architecture, because it requires far jumps to label crossing between
930 hot/cold sections which are rejected on this architecture. */
931 if (flag_reorder_blocks_and_partition)
933 if (flag_exceptions)
935 inform (input_location,
936 "-freorder-blocks-and-partition does not work with "
937 "exceptions on this architecture");
938 flag_reorder_blocks_and_partition = 0;
939 flag_reorder_blocks = 1;
941 else if (flag_unwind_tables)
943 inform (input_location,
944 "-freorder-blocks-and-partition does not support unwind "
945 "info on this architecture");
946 flag_reorder_blocks_and_partition = 0;
947 flag_reorder_blocks = 1;
951 /* Adjust loop, jump and function alignment values (in bytes), if those
952 were not specified by the user using -falign-loops, -falign-jumps
953 and -falign-functions options.
954 32 bit alignment is better for speed, because instructions can be
955 fetched as a pair from a longword boundary. For size use 16 bit
956 alignment to get more compact code.
957 Aligning all jumps increases the code size, even if it might
958 result in slightly faster code. Thus, it is set to the smallest
959 alignment possible if not specified by the user. */
960 if (align_loops == 0)
962 if (TARGET_SH5)
963 align_loops = 8;
964 else
965 align_loops = optimize_size ? 2 : 4;
968 if (align_jumps == 0)
970 if (TARGET_SHMEDIA)
971 align_jumps = 1 << CACHE_LOG;
972 else
973 align_jumps = 2;
975 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
976 align_jumps = TARGET_SHMEDIA ? 4 : 2;
978 if (align_functions == 0)
980 if (TARGET_SHMEDIA)
981 align_functions = optimize_size
982 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
983 else
984 align_functions = optimize_size ? 2 : 4;
987 /* The linker relaxation code breaks when a function contains
988 alignments that are larger than that at the start of a
989 compilation unit. */
990 if (TARGET_RELAX)
992 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
994 /* Also take possible .long constants / mova tables into account. */
995 if (min_align < 4)
996 min_align = 4;
997 if (align_functions < min_align)
998 align_functions = min_align;
1001 if (flag_unsafe_math_optimizations)
1003 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1004 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1005 TARGET_FSCA = 1;
1007 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1008 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1009 TARGET_FSRRA = 1;
1012 /* Allow fsrra insn only if -funsafe-math-optimizations and
1013 -ffinite-math-only is enabled. */
1014 TARGET_FSRRA = TARGET_FSRRA
1015 && flag_unsafe_math_optimizations
1016 && flag_finite_math_only;
1018 /* If the -mieee option was not explicitly set by the user, turn it on
1019 unless -ffinite-math-only was specified. See also PR 33135. */
1020 if (! global_options_set.x_TARGET_IEEE)
1021 TARGET_IEEE = ! flag_finite_math_only;
1023 if (sh_fixed_range_str)
1024 sh_fix_range (sh_fixed_range_str);
1026 /* This target defaults to strict volatile bitfields. */
1027 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1028 flag_strict_volatile_bitfields = 1;
1030 /* Parse atomic model option and make sure it is valid for the current
1031 target CPU. */
1032 selected_atomic_model_
1033 = parse_validate_atomic_model_option (sh_atomic_model_str);
1036 /* Print the operand address in x to the stream. */
1037 static void
1038 sh_print_operand_address (FILE *stream, rtx x)
1040 switch (GET_CODE (x))
1042 case REG:
1043 case SUBREG:
1044 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1045 break;
1047 case PLUS:
1049 rtx base = XEXP (x, 0);
1050 rtx index = XEXP (x, 1);
1052 switch (GET_CODE (index))
1054 case CONST_INT:
1055 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1056 reg_names[true_regnum (base)]);
1057 break;
1059 case REG:
1060 case SUBREG:
1062 int base_num = true_regnum (base);
1063 int index_num = true_regnum (index);
1065 fprintf (stream, "@(r0,%s)",
1066 reg_names[MAX (base_num, index_num)]);
1067 break;
1070 default:
1071 gcc_unreachable ();
1074 break;
1076 case PRE_DEC:
1077 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1078 break;
1080 case POST_INC:
1081 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1082 break;
1084 default:
1085 x = mark_constant_pool_use (x);
1086 output_addr_const (stream, x);
1087 break;
1091 /* Print operand x (an rtx) in assembler syntax to file stream
1092 according to modifier code.
1094 '.' print a .s if insn needs delay slot
1095 ',' print LOCAL_LABEL_PREFIX
1096 '@' print trap, rte or rts depending upon pragma interruptness
1097 '#' output a nop if there is nothing to put in the delay slot
1098 ''' print likelihood suffix (/u for unlikely).
1099 '>' print branch target if -fverbose-asm
1100 'O' print a constant without the #
1101 'R' print the LSW of a dp value - changes if in little endian
1102 'S' print the MSW of a dp value - changes if in little endian
1103 'T' print the next word of a dp value - same as 'R' in big endian mode.
1104 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1105 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1106 'N' print 'r63' if the operand is (const_int 0).
1107 'd' print a V2SF reg as dN instead of fpN.
1108 'm' print a pair `base,offset' or `base,index', for LD and ST.
1109 'U' Likewise for {LD,ST}{HI,LO}.
1110 'V' print the position of a single bit set.
1111 'W' print the position of a single bit cleared.
1112 't' print a memory address which is a register.
1113 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1114 'o' output an operator. */
1115 static void
1116 sh_print_operand (FILE *stream, rtx x, int code)
1118 int regno;
1119 enum machine_mode mode;
1121 switch (code)
1123 tree trapa_attr;
1125 case '.':
1126 if (final_sequence
1127 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1128 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1129 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1130 break;
1131 case ',':
1132 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1133 break;
1134 case '@':
1135 trapa_attr = lookup_attribute ("trap_exit",
1136 DECL_ATTRIBUTES (current_function_decl));
1137 if (trapa_attr)
1138 fprintf (stream, "trapa #%ld",
1139 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1140 else if (sh_cfun_interrupt_handler_p ())
1142 if (sh_cfun_resbank_handler_p ())
1143 fprintf (stream, "resbank\n");
1144 fprintf (stream, "rte");
1146 else
1147 fprintf (stream, "rts");
1148 break;
1149 case '#':
1150 /* Output a nop if there's nothing in the delay slot. */
1151 if (dbr_sequence_length () == 0)
1152 fprintf (stream, "\n\tnop");
1153 break;
1154 case '\'':
1156 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1158 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
1159 fputs ("/u", stream);
1160 break;
1162 case '>':
1163 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1165 fputs ("\t! target: ", stream);
1166 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1168 break;
1169 case 'O':
1170 x = mark_constant_pool_use (x);
1171 output_addr_const (stream, x);
1172 break;
1173 /* N.B.: %R / %S / %T adjust memory addresses by four.
1174 For SHMEDIA, that means they can be used to access the first and
1175 second 32 bit part of a 64 bit (or larger) value that
1176 might be held in floating point registers or memory.
1177 While they can be used to access 64 bit parts of a larger value
1178 held in general purpose registers, that won't work with memory -
1179 neither for fp registers, since the frxx names are used. */
1180 case 'R':
1181 if (REG_P (x) || GET_CODE (x) == SUBREG)
1183 regno = true_regnum (x);
1184 regno += FP_REGISTER_P (regno) ? 1 : LSW;
1185 fputs (reg_names[regno], (stream));
1187 else if (MEM_P (x))
1189 x = adjust_address (x, SImode, 4 * LSW);
1190 sh_print_operand_address (stream, XEXP (x, 0));
1192 else
1194 rtx sub = NULL_RTX;
1196 mode = GET_MODE (x);
1197 if (mode == VOIDmode)
1198 mode = DImode;
1199 if (GET_MODE_SIZE (mode) >= 8)
1200 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
1201 if (sub)
1202 sh_print_operand (stream, sub, 0);
1203 else
1204 output_operand_lossage ("invalid operand to %%R");
1206 break;
1207 case 'S':
1208 if (REG_P (x) || GET_CODE (x) == SUBREG)
1210 regno = true_regnum (x);
1211 regno += FP_REGISTER_P (regno) ? 0 : MSW;
1212 fputs (reg_names[regno], (stream));
1214 else if (MEM_P (x))
1216 x = adjust_address (x, SImode, 4 * MSW);
1217 sh_print_operand_address (stream, XEXP (x, 0));
1219 else
1221 rtx sub = NULL_RTX;
1223 mode = GET_MODE (x);
1224 if (mode == VOIDmode)
1225 mode = DImode;
1226 if (GET_MODE_SIZE (mode) >= 8)
1227 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
1228 if (sub)
1229 sh_print_operand (stream, sub, 0);
1230 else
1231 output_operand_lossage ("invalid operand to %%S");
1233 break;
1234 case 'T':
1235 /* Next word of a double. */
1236 switch (GET_CODE (x))
1238 case REG:
1239 fputs (reg_names[REGNO (x) + 1], (stream));
1240 break;
1241 case MEM:
1242 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1243 && GET_CODE (XEXP (x, 0)) != POST_INC)
1244 x = adjust_address (x, SImode, 4);
1245 sh_print_operand_address (stream, XEXP (x, 0));
1246 break;
1247 default:
1248 break;
1250 break;
1252 case 't':
1253 gcc_assert (MEM_P (x));
1254 x = XEXP (x, 0);
1255 switch (GET_CODE (x))
1257 case REG:
1258 case SUBREG:
1259 sh_print_operand (stream, x, 0);
1260 break;
1261 default:
1262 break;
1264 break;
1266 case 'o':
1267 switch (GET_CODE (x))
1269 case PLUS: fputs ("add", stream); break;
1270 case MINUS: fputs ("sub", stream); break;
1271 case MULT: fputs ("mul", stream); break;
1272 case DIV: fputs ("div", stream); break;
1273 case EQ: fputs ("eq", stream); break;
1274 case NE: fputs ("ne", stream); break;
1275 case GT: case LT: fputs ("gt", stream); break;
1276 case GE: case LE: fputs ("ge", stream); break;
1277 case GTU: case LTU: fputs ("gtu", stream); break;
1278 case GEU: case LEU: fputs ("geu", stream); break;
1279 default:
1280 break;
1282 break;
1283 case 'M':
1284 if (TARGET_SHMEDIA)
1286 if (MEM_P (x)
1287 && GET_CODE (XEXP (x, 0)) == PLUS
1288 && (REG_P (XEXP (XEXP (x, 0), 1))
1289 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1290 fputc ('x', stream);
1292 else
1294 if (MEM_P (x))
1296 switch (GET_MODE (x))
1298 case QImode: fputs (".b", stream); break;
1299 case HImode: fputs (".w", stream); break;
1300 case SImode: fputs (".l", stream); break;
1301 case SFmode: fputs (".s", stream); break;
1302 case DFmode: fputs (".d", stream); break;
1303 default: gcc_unreachable ();
1307 break;
1309 case 'm':
1310 gcc_assert (MEM_P (x));
1311 x = XEXP (x, 0);
1312 /* Fall through. */
1313 case 'U':
1314 switch (GET_CODE (x))
1316 case REG:
1317 case SUBREG:
1318 sh_print_operand (stream, x, 0);
1319 fputs (", 0", stream);
1320 break;
1322 case PLUS:
1323 sh_print_operand (stream, XEXP (x, 0), 0);
1324 fputs (", ", stream);
1325 sh_print_operand (stream, XEXP (x, 1), 0);
1326 break;
1328 default:
1329 gcc_unreachable ();
1331 break;
1333 case 'V':
1335 int num = exact_log2 (INTVAL (x));
1336 gcc_assert (num >= 0);
1337 fprintf (stream, "#%d", num);
1339 break;
1341 case 'W':
1343 int num = exact_log2 (~INTVAL (x));
1344 gcc_assert (num >= 0);
1345 fprintf (stream, "#%d", num);
1347 break;
1349 case 'd':
1350 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1352 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1353 break;
1355 case 'N':
1356 if (x == CONST0_RTX (GET_MODE (x)))
1358 fprintf ((stream), "r63");
1359 break;
1361 goto default_output;
1362 case 'u':
1363 if (CONST_INT_P (x))
1365 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1366 break;
1368 /* Fall through. */
1370 default_output:
1371 default:
1372 regno = 0;
1373 mode = GET_MODE (x);
1375 switch (GET_CODE (x))
1377 case TRUNCATE:
1379 rtx inner = XEXP (x, 0);
1380 int offset = 0;
1381 enum machine_mode inner_mode;
1383 /* We might see SUBREGs with vector mode registers inside. */
1384 if (GET_CODE (inner) == SUBREG
1385 && (GET_MODE_SIZE (GET_MODE (inner))
1386 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1387 && subreg_lowpart_p (inner))
1388 inner = SUBREG_REG (inner);
1389 if (CONST_INT_P (inner))
1391 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1392 goto default_output;
1394 inner_mode = GET_MODE (inner);
1395 if (GET_CODE (inner) == SUBREG
1396 && (GET_MODE_SIZE (GET_MODE (inner))
1397 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1398 && REG_P (SUBREG_REG (inner)))
1400 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1401 GET_MODE (SUBREG_REG (inner)),
1402 SUBREG_BYTE (inner),
1403 GET_MODE (inner));
1404 inner = SUBREG_REG (inner);
1406 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1407 abort ();
1408 /* Floating point register pairs are always big endian;
1409 general purpose registers are 64 bit wide. */
1410 regno = REGNO (inner);
1411 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1412 - HARD_REGNO_NREGS (regno, mode))
1413 + offset;
1414 x = inner;
1415 goto reg;
1417 case SIGN_EXTEND:
1418 x = XEXP (x, 0);
1419 goto reg;
1420 /* FIXME: We need this on SHmedia32 because reload generates
1421 some sign-extended HI or QI loads into DImode registers
1422 but, because Pmode is SImode, the address ends up with a
1423 subreg:SI of the DImode register. Maybe reload should be
1424 fixed so as to apply alter_subreg to such loads? */
1425 case IF_THEN_ELSE:
1426 gcc_assert (trapping_target_operand (x, VOIDmode));
1427 x = XEXP (XEXP (x, 2), 0);
1428 goto default_output;
1429 case SUBREG:
1430 gcc_assert (SUBREG_BYTE (x) == 0
1431 && REG_P (SUBREG_REG (x)));
1433 x = SUBREG_REG (x);
1434 /* Fall through. */
1436 reg:
1437 case REG:
1438 regno += REGNO (x);
1439 if (FP_REGISTER_P (regno)
1440 && mode == V16SFmode)
1441 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1442 else if (FP_REGISTER_P (REGNO (x))
1443 && mode == V4SFmode)
1444 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1445 else if (REG_P (x)
1446 && mode == V2SFmode)
1447 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1448 else if (FP_REGISTER_P (REGNO (x))
1449 && GET_MODE_SIZE (mode) > 4)
1450 fprintf ((stream), "d%s", reg_names[regno] + 1);
1451 else
1452 fputs (reg_names[regno], (stream));
1453 break;
1455 case MEM:
1456 output_address (XEXP (x, 0));
1457 break;
1459 default:
1460 if (TARGET_SH1)
1461 fputc ('#', stream);
1462 output_addr_const (stream, x);
1463 break;
1465 break;
1469 static bool
1470 sh_print_operand_punct_valid_p (unsigned char code)
1472 return (code == '.' || code == '#' || code == '@' || code == ','
1473 || code == '$' || code == '\'' || code == '>');
1476 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1477 static bool
1478 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1480 if (GET_CODE (x) == UNSPEC)
1482 switch (XINT (x, 1))
1484 case UNSPEC_DATALABEL:
1485 fputs ("datalabel ", file);
1486 output_addr_const (file, XVECEXP (x, 0, 0));
1487 break;
1488 case UNSPEC_PIC:
1489 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1490 output_addr_const (file, XVECEXP (x, 0, 0));
1491 break;
1492 case UNSPEC_GOT:
1493 output_addr_const (file, XVECEXP (x, 0, 0));
1494 fputs ("@GOT", file);
1495 break;
1496 case UNSPEC_GOTOFF:
1497 output_addr_const (file, XVECEXP (x, 0, 0));
1498 fputs ("@GOTOFF", file);
1499 break;
1500 case UNSPEC_PLT:
1501 output_addr_const (file, XVECEXP (x, 0, 0));
1502 fputs ("@PLT", file);
1503 break;
1504 case UNSPEC_GOTPLT:
1505 output_addr_const (file, XVECEXP (x, 0, 0));
1506 fputs ("@GOTPLT", file);
1507 break;
1508 case UNSPEC_DTPOFF:
1509 output_addr_const (file, XVECEXP (x, 0, 0));
1510 fputs ("@DTPOFF", file);
1511 break;
1512 case UNSPEC_GOTTPOFF:
1513 output_addr_const (file, XVECEXP (x, 0, 0));
1514 fputs ("@GOTTPOFF", file);
1515 break;
1516 case UNSPEC_TPOFF:
1517 output_addr_const (file, XVECEXP (x, 0, 0));
1518 fputs ("@TPOFF", file);
1519 break;
1520 case UNSPEC_CALLER:
1522 char name[32];
1523 /* LPCS stands for Label for PIC Call Site. */
1524 targetm.asm_out.generate_internal_label (name, "LPCS",
1525 INTVAL (XVECEXP (x, 0, 0)));
1526 assemble_name (file, name);
1528 break;
1529 case UNSPEC_EXTRACT_S16:
1530 case UNSPEC_EXTRACT_U16:
1532 rtx val, shift;
1534 val = XVECEXP (x, 0, 0);
1535 shift = XVECEXP (x, 0, 1);
1536 fputc ('(', file);
1537 if (shift != const0_rtx)
1538 fputc ('(', file);
1539 if (GET_CODE (val) == CONST
1540 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1542 fputc ('(', file);
1543 output_addr_const (file, val);
1544 fputc (')', file);
1546 else
1547 output_addr_const (file, val);
1548 if (shift != const0_rtx)
1550 fputs (" >> ", file);
1551 output_addr_const (file, shift);
1552 fputc (')', file);
1554 fputs (" & 65535)", file);
1556 break;
1557 case UNSPEC_SYMOFF:
1558 output_addr_const (file, XVECEXP (x, 0, 0));
1559 fputc ('-', file);
1560 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1562 fputc ('(', file);
1563 output_addr_const (file, XVECEXP (x, 0, 1));
1564 fputc (')', file);
1566 else
1567 output_addr_const (file, XVECEXP (x, 0, 1));
1568 break;
1569 case UNSPEC_PCREL_SYMOFF:
1570 output_addr_const (file, XVECEXP (x, 0, 0));
1571 fputs ("-(", file);
1572 output_addr_const (file, XVECEXP (x, 0, 1));
1573 fputs ("-.)", file);
1574 break;
1575 default:
1576 return false;
1578 return true;
1580 else
1581 return false;
1584 /* Encode symbol attributes of a SYMBOL_REF into its
1585 SYMBOL_REF_FLAGS. */
1586 static void
1587 sh_encode_section_info (tree decl, rtx rtl, int first)
1589 default_encode_section_info (decl, rtl, first);
1591 if (TREE_CODE (decl) == FUNCTION_DECL
1592 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1593 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1596 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1597 static void
1598 force_into (rtx value, rtx target)
1600 value = force_operand (value, target);
1601 if (! rtx_equal_p (value, target))
1602 emit_insn (gen_move_insn (target, value));
1605 /* Emit code to perform a block move. Choose the best method.
1607 OPERANDS[0] is the destination.
1608 OPERANDS[1] is the source.
1609 OPERANDS[2] is the size.
1610 OPERANDS[3] is the alignment safe to use. */
1611 bool
1612 expand_block_move (rtx *operands)
1614 int align = INTVAL (operands[3]);
1615 int constp = (CONST_INT_P (operands[2]));
1616 int bytes = (constp ? INTVAL (operands[2]) : 0);
1618 if (! constp)
1619 return false;
1621 /* If we could use mov.l to move words and dest is word-aligned, we
1622 can use movua.l for loads and still generate a relatively short
1623 and efficient sequence. */
1624 if (TARGET_SH4A_ARCH && align < 4
1625 && MEM_ALIGN (operands[0]) >= 32
1626 && can_move_by_pieces (bytes, 32))
1628 rtx dest = copy_rtx (operands[0]);
1629 rtx src = copy_rtx (operands[1]);
1630 /* We could use different pseudos for each copied word, but
1631 since movua can only load into r0, it's kind of
1632 pointless. */
1633 rtx temp = gen_reg_rtx (SImode);
1634 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1635 int copied = 0;
1637 while (copied + 4 <= bytes)
1639 rtx to = adjust_address (dest, SImode, copied);
1640 rtx from = adjust_automodify_address (src, BLKmode,
1641 src_addr, copied);
1643 set_mem_size (from, 4);
1644 emit_insn (gen_movua (temp, from));
1645 emit_move_insn (src_addr, plus_constant (Pmode, src_addr, 4));
1646 emit_move_insn (to, temp);
1647 copied += 4;
1650 if (copied < bytes)
1651 move_by_pieces (adjust_address (dest, BLKmode, copied),
1652 adjust_automodify_address (src, BLKmode,
1653 src_addr, copied),
1654 bytes - copied, align, 0);
1656 return true;
1659 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1660 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1661 if (align < 4 || (bytes % 4 != 0))
1662 return false;
1664 if (TARGET_HARD_SH4)
1666 if (bytes < 12)
1667 return false;
1668 else if (bytes == 12)
1670 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1671 rtx r4 = gen_rtx_REG (SImode, 4);
1672 rtx r5 = gen_rtx_REG (SImode, 5);
1674 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1675 force_into (XEXP (operands[0], 0), r4);
1676 force_into (XEXP (operands[1], 0), r5);
1677 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1678 return true;
1680 else if (! optimize_size)
1682 const char *entry_name;
1683 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1684 int dwords;
1685 rtx r4 = gen_rtx_REG (SImode, 4);
1686 rtx r5 = gen_rtx_REG (SImode, 5);
1687 rtx r6 = gen_rtx_REG (SImode, 6);
1689 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1690 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1691 force_into (XEXP (operands[0], 0), r4);
1692 force_into (XEXP (operands[1], 0), r5);
1694 dwords = bytes >> 3;
1695 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1696 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1697 return true;
1699 else
1700 return false;
1702 if (bytes < 64)
1704 char entry[30];
1705 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1706 rtx r4 = gen_rtx_REG (SImode, 4);
1707 rtx r5 = gen_rtx_REG (SImode, 5);
1709 sprintf (entry, "__movmemSI%d", bytes);
1710 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1711 force_into (XEXP (operands[0], 0), r4);
1712 force_into (XEXP (operands[1], 0), r5);
1713 emit_insn (gen_block_move_real (func_addr_rtx));
1714 return true;
1717 /* This is the same number of bytes as a memcpy call, but to a different
1718 less common function name, so this will occasionally use more space. */
1719 if (! optimize_size)
1721 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1722 int final_switch, while_loop;
1723 rtx r4 = gen_rtx_REG (SImode, 4);
1724 rtx r5 = gen_rtx_REG (SImode, 5);
1725 rtx r6 = gen_rtx_REG (SImode, 6);
1727 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1728 force_into (XEXP (operands[0], 0), r4);
1729 force_into (XEXP (operands[1], 0), r5);
1731 /* r6 controls the size of the move. 16 is decremented from it
1732 for each 64 bytes moved. Then the negative bit left over is used
1733 as an index into a list of move instructions. e.g., a 72 byte move
1734 would be set up with size(r6) = 14, for one iteration through the
1735 big while loop, and a switch of -2 for the last part. */
1737 final_switch = 16 - ((bytes / 4) % 16);
1738 while_loop = ((bytes / 4) / 16 - 1) * 16;
1739 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1740 emit_insn (gen_block_lump_real (func_addr_rtx));
1741 return true;
1744 return false;
1747 /* Prepare operands for a move define_expand; specifically, one of the
1748 operands must be in a register. */
1749 void
1750 prepare_move_operands (rtx operands[], enum machine_mode mode)
1752 if ((mode == SImode || mode == DImode)
1753 && flag_pic
1754 && ! ((mode == Pmode || mode == ptr_mode)
1755 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1757 rtx temp;
1758 if (SYMBOLIC_CONST_P (operands[1]))
1760 if (MEM_P (operands[0]))
1761 operands[1] = force_reg (Pmode, operands[1]);
1762 else if (TARGET_SHMEDIA
1763 && GET_CODE (operands[1]) == LABEL_REF
1764 && target_reg_operand (operands[0], mode))
1765 /* It's ok. */;
1766 else
1768 temp = (!can_create_pseudo_p ()
1769 ? operands[0]
1770 : gen_reg_rtx (Pmode));
1771 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1774 else if (GET_CODE (operands[1]) == CONST
1775 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1776 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1778 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1779 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1780 mode, temp);
1781 operands[1] = expand_binop (mode, add_optab, temp,
1782 XEXP (XEXP (operands[1], 0), 1),
1783 (!can_create_pseudo_p ()
1784 ? temp
1785 : gen_reg_rtx (Pmode)),
1786 0, OPTAB_LIB_WIDEN);
1790 if (! reload_in_progress && ! reload_completed)
1792 /* Copy the source to a register if both operands aren't registers. */
1793 if (! register_operand (operands[0], mode)
1794 && ! sh_register_operand (operands[1], mode))
1795 operands[1] = copy_to_mode_reg (mode, operands[1]);
1797 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1799 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1800 except that we can't use that function because it is static. */
1801 rtx new_rtx = change_address (operands[0], mode, 0);
1802 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1803 operands[0] = new_rtx;
1806 /* This case can happen while generating code to move the result
1807 of a library call to the target. Reject `st r0,@(rX,rY)' because
1808 reload will fail to find a spill register for rX, since r0 is already
1809 being used for the source. */
1810 else if (TARGET_SH1
1811 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1812 && MEM_P (operands[0])
1813 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1814 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1815 operands[1] = copy_to_mode_reg (mode, operands[1]);
1818 if (mode == Pmode || mode == ptr_mode)
1820 rtx op0, op1, opc;
1821 enum tls_model tls_kind;
1823 op0 = operands[0];
1824 op1 = operands[1];
1825 if (GET_CODE (op1) == CONST
1826 && GET_CODE (XEXP (op1, 0)) == PLUS
1827 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1828 != TLS_MODEL_NONE))
1830 opc = XEXP (XEXP (op1, 0), 1);
1831 op1 = XEXP (XEXP (op1, 0), 0);
1833 else
1834 opc = NULL_RTX;
1836 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1838 rtx tga_op1, tga_ret, tmp, tmp2;
1840 if (! flag_pic
1841 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1842 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1843 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1845 /* Don't schedule insns for getting GOT address when
1846 the first scheduling is enabled, to avoid spill
1847 failures for R0. */
1848 if (flag_schedule_insns)
1849 emit_insn (gen_blockage ());
1850 emit_insn (gen_GOTaddr2picreg ());
1851 emit_use (gen_rtx_REG (SImode, PIC_REG));
1852 if (flag_schedule_insns)
1853 emit_insn (gen_blockage ());
1856 switch (tls_kind)
1858 case TLS_MODEL_GLOBAL_DYNAMIC:
1859 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1860 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1861 tmp = gen_reg_rtx (Pmode);
1862 emit_move_insn (tmp, tga_ret);
1863 op1 = tmp;
1864 break;
1866 case TLS_MODEL_LOCAL_DYNAMIC:
1867 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1868 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1870 tmp = gen_reg_rtx (Pmode);
1871 emit_move_insn (tmp, tga_ret);
1873 if (register_operand (op0, Pmode))
1874 tmp2 = op0;
1875 else
1876 tmp2 = gen_reg_rtx (Pmode);
1878 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1879 op1 = tmp2;
1880 break;
1882 case TLS_MODEL_INITIAL_EXEC:
1883 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1884 tmp = gen_sym2GOTTPOFF (op1);
1885 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1886 op1 = tga_op1;
1887 break;
1889 case TLS_MODEL_LOCAL_EXEC:
1890 tmp2 = gen_reg_rtx (Pmode);
1891 emit_insn (gen_store_gbr (tmp2));
1892 tmp = gen_reg_rtx (Pmode);
1893 emit_insn (gen_symTPOFF2reg (tmp, op1));
1895 if (register_operand (op0, Pmode))
1896 op1 = op0;
1897 else
1898 op1 = gen_reg_rtx (Pmode);
1900 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1901 break;
1903 default:
1904 gcc_unreachable ();
1906 if (opc)
1907 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1908 operands[1] = op1;
1913 /* Implement the canonicalize_comparison target hook for the combine
1914 pass. For the target hook this function is invoked via
1915 sh_canonicalize_comparison. This function is also re-used to
1916 canonicalize comparisons in cbranch pattern expanders. */
1917 static void
1918 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1919 enum machine_mode mode,
1920 bool op0_preserve_value ATTRIBUTE_UNUSED)
1922 /* When invoked from within the combine pass the mode is not specified,
1923 so try to get it from one of the operands. */
1924 if (mode == VOIDmode)
1925 mode = GET_MODE (op0);
1926 if (mode == VOIDmode)
1927 mode = GET_MODE (op1);
1929 // We need to have a mode to do something useful here.
1930 if (mode == VOIDmode)
1931 return;
1933 // Currently, we don't deal with floats here.
1934 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1935 return;
1937 // Make sure that the constant operand is the second operand.
1938 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1940 std::swap (op0, op1);
1941 cmp = swap_condition (cmp);
1944 if (CONST_INT_P (op1))
1946 /* Try to adjust the constant operand in such a way that available
1947 comparison insns can be utilized better and the constant can be
1948 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1949 constant pool. */
1950 const HOST_WIDE_INT val = INTVAL (op1);
1952 /* x > -1 --> x >= 0
1953 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1954 x <= -1 --> x < 0
1955 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1956 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1958 cmp = cmp == GT ? GE : LT;
1959 op1 = gen_int_mode (val + 1, mode);
1962 /* x >= 1 --> x > 0
1963 x >= 0x80 --> x > 0x7F
1964 x < 1 --> x <= 0
1965 x < 0x80 --> x <= 0x7F */
1966 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1968 cmp = cmp == GE ? GT : LE;
1969 op1 = gen_int_mode (val - 1, mode);
1972 /* unsigned x >= 1 --> x != 0
1973 unsigned x < 1 --> x == 0 */
1974 else if (val == 1 && (cmp == GEU || cmp == LTU))
1976 cmp = cmp == GEU ? NE : EQ;
1977 op1 = CONST0_RTX (mode);
1980 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1981 unsigned x < 0x80 --> unsigned x < 0x7F */
1982 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1984 cmp = cmp == GEU ? GTU : LEU;
1985 op1 = gen_int_mode (val - 1, mode);
1988 /* unsigned x > 0 --> x != 0
1989 unsigned x <= 0 --> x == 0 */
1990 else if (val == 0 && (cmp == GTU || cmp == LEU))
1991 cmp = cmp == GTU ? NE : EQ;
1993 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1994 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1995 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1996 && val == 0x7FFFFFFF)
1998 cmp = cmp == GTU ? LT : GE;
1999 op1 = const0_rtx;
2002 /* unsigned x >= 0x80000000 --> signed x < 0
2003 unsigned x < 0x80000000 --> signed x >= 0 */
2004 else if (mode == SImode && (cmp == GEU || cmp == LTU)
2005 && (unsigned HOST_WIDE_INT)val
2006 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
2008 cmp = cmp == GEU ? LT : GE;
2009 op1 = const0_rtx;
2014 /* This function implements the canonicalize_comparison target hook.
2015 This wrapper around the internally used sh_canonicalize_comparison
2016 function is needed to do the enum rtx_code <-> int conversion.
2017 Target hooks cannot use enum rtx_code in its definition. */
2018 static void
2019 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
2020 bool op0_preserve_value)
2022 enum rtx_code tmp_code = (enum rtx_code)*code;
2023 sh_canonicalize_comparison (tmp_code, *op0, *op1,
2024 VOIDmode, op0_preserve_value);
2025 *code = (int)tmp_code;
2028 enum rtx_code
2029 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
2030 enum rtx_code comparison)
2032 /* The scratch reg is only available when this is invoked from within
2033 the cbranchdi4_i splitter, through expand_cbranchdi4. */
2034 rtx scratch = NULL_RTX;
2036 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2037 comparison = GET_CODE (operands[0]);
2038 else
2039 scratch = operands[4];
2041 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2042 mode, false);
2044 /* Notice that this function is also invoked after reload by
2045 the cbranchdi4_i pattern, through expand_cbranchdi4. */
2046 rtx op1 = operands[1];
2048 if (can_create_pseudo_p ())
2049 operands[1] = force_reg (mode, op1);
2050 /* When we are handling DImode comparisons, we want to keep constants so
2051 that we can optimize the component comparisons; however, memory loads
2052 are better issued as a whole so that they can be scheduled well.
2053 SImode equality comparisons allow I08 constants, but only when they
2054 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2055 into a register, that register might as well be r0, and we allow the
2056 constant. If it is already in a register, this is likely to be
2057 allocated to a different hard register, thus we load the constant into
2058 a register unless it is zero. */
2059 if (!REG_P (operands[2])
2060 && (!CONST_INT_P (operands[2])
2061 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2062 && ((comparison != EQ && comparison != NE)
2063 || (REG_P (op1) && REGNO (op1) != R0_REG)
2064 || !satisfies_constraint_I08 (operands[2])))))
2066 if (scratch && GET_MODE (scratch) == mode)
2068 emit_move_insn (scratch, operands[2]);
2069 operands[2] = scratch;
2071 else if (can_create_pseudo_p ())
2072 operands[2] = force_reg (mode, operands[2]);
2074 return comparison;
2077 void
2078 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2080 rtx (*branch_expander) (rtx) = gen_branch_true;
2081 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2082 switch (comparison)
2084 case NE: case LT: case LE: case LTU: case LEU:
2085 comparison = reverse_condition (comparison);
2086 branch_expander = gen_branch_false;
2087 default: ;
2089 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
2090 gen_rtx_fmt_ee (comparison, SImode,
2091 operands[1], operands[2])));
2092 rtx jump = emit_jump_insn (branch_expander (operands[3]));
2093 if (probability >= 0)
2094 add_reg_note (jump, REG_BR_PROB, GEN_INT (probability));
2097 /* ??? How should we distribute probabilities when more than one branch
2098 is generated. So far we only have some ad-hoc observations:
2099 - If the operands are random, they are likely to differ in both parts.
2100 - If comparing items in a hash chain, the operands are random or equal;
2101 operation should be EQ or NE.
2102 - If items are searched in an ordered tree from the root, we can expect
2103 the highpart to be unequal about half of the time; operation should be
2104 an inequality comparison, operands non-constant, and overall probability
2105 about 50%. Likewise for quicksort.
2106 - Range checks will be often made against constants. Even if we assume for
2107 simplicity an even distribution of the non-constant operand over a
2108 sub-range here, the same probability could be generated with differently
2109 wide sub-ranges - as long as the ratio of the part of the subrange that
2110 is before the threshold to the part that comes after the threshold stays
2111 the same. Thus, we can't really tell anything here;
2112 assuming random distribution is at least simple.
2114 bool
2115 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2117 enum rtx_code msw_taken, msw_skip, lsw_taken;
2118 rtx skip_label = NULL_RTX;
2119 rtx op1h, op1l, op2h, op2l;
2120 int num_branches;
2121 int prob, rev_prob;
2122 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2123 rtx scratch = operands[4];
2125 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2126 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2127 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2128 op1l = gen_lowpart (SImode, operands[1]);
2129 op2l = gen_lowpart (SImode, operands[2]);
2130 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2131 prob = split_branch_probability;
2132 rev_prob = REG_BR_PROB_BASE - prob;
2133 switch (comparison)
2135 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2136 That costs 1 cycle more when the first branch can be predicted taken,
2137 but saves us mispredicts because only one branch needs prediction.
2138 It also enables generating the cmpeqdi_t-1 pattern. */
2139 case EQ:
2140 if (TARGET_CMPEQDI_T)
2142 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2143 emit_jump_insn (gen_branch_true (operands[3]));
2144 return true;
2146 msw_skip = NE;
2147 lsw_taken = EQ;
2148 if (prob >= 0)
2150 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2151 msw_skip_prob = rev_prob;
2152 if (REG_BR_PROB_BASE <= 65535)
2153 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2154 else
2156 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
2157 lsw_taken_prob
2158 = (prob
2159 ? (REG_BR_PROB_BASE
2160 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
2161 / ((HOST_WIDEST_INT) prob << 32)))
2162 : 0);
2165 break;
2166 case NE:
2167 if (TARGET_CMPEQDI_T)
2169 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2170 emit_jump_insn (gen_branch_false (operands[3]));
2171 return true;
2173 msw_taken = NE;
2174 msw_taken_prob = prob;
2175 lsw_taken = NE;
2176 lsw_taken_prob = 0;
2177 break;
2178 case GTU: case GT:
2179 msw_taken = comparison;
2180 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2181 break;
2182 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2183 msw_skip = swap_condition (msw_taken);
2184 lsw_taken = GTU;
2185 break;
2186 case GEU: case GE:
2187 if (op2l == CONST0_RTX (SImode))
2188 msw_taken = comparison;
2189 else
2191 msw_taken = comparison == GE ? GT : GTU;
2192 msw_skip = swap_condition (msw_taken);
2193 lsw_taken = GEU;
2195 break;
2196 case LTU: case LT:
2197 msw_taken = comparison;
2198 if (op2l == CONST0_RTX (SImode))
2199 break;
2200 msw_skip = swap_condition (msw_taken);
2201 lsw_taken = LTU;
2202 break;
2203 case LEU: case LE:
2204 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2205 msw_taken = comparison;
2206 else
2208 lsw_taken = LEU;
2209 if (comparison == LE)
2210 msw_taken = LT;
2211 else if (op2h != CONST0_RTX (SImode))
2212 msw_taken = LTU;
2213 else
2215 msw_skip = swap_condition (LTU);
2216 break;
2218 msw_skip = swap_condition (msw_taken);
2220 break;
2221 default: return false;
2223 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2224 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2225 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2226 if (comparison != EQ && comparison != NE && num_branches > 1)
2228 if (!CONSTANT_P (operands[2])
2229 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2230 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2232 msw_taken_prob = prob / 2U;
2233 msw_skip_prob
2234 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2235 lsw_taken_prob = prob;
2237 else
2239 msw_taken_prob = prob;
2240 msw_skip_prob = REG_BR_PROB_BASE;
2241 /* ??? If we have a constant op2h, should we use that when
2242 calculating lsw_taken_prob? */
2243 lsw_taken_prob = prob;
2246 operands[1] = op1h;
2247 operands[2] = op2h;
2248 operands[4] = NULL_RTX;
2249 if (reload_completed
2250 && ! arith_reg_or_0_operand (op2h, SImode)
2251 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2252 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2253 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2255 emit_move_insn (scratch, operands[2]);
2256 operands[2] = scratch;
2258 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2259 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2260 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2262 rtx taken_label = operands[3];
2264 /* Operands were possibly modified, but msw_skip doesn't expect this.
2265 Always use the original ones. */
2266 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2268 operands[1] = op1h;
2269 operands[2] = op2h;
2270 if (reload_completed
2271 && ! arith_reg_or_0_operand (op2h, SImode)
2272 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2274 emit_move_insn (scratch, operands[2]);
2275 operands[2] = scratch;
2279 operands[3] = skip_label = gen_label_rtx ();
2280 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2281 operands[3] = taken_label;
2283 operands[1] = op1l;
2284 operands[2] = op2l;
2285 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2287 if (reload_completed
2288 && ! arith_reg_or_0_operand (op2l, SImode)
2289 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2291 emit_move_insn (scratch, operands[2]);
2292 operands[2] = scratch;
2294 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2296 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2297 emit_label (skip_label);
2298 return true;
2301 /* Given an operand, return 1 if the evaluated operand plugged into an
2302 if_then_else will result in a branch_true, 0 if branch_false, or
2303 -1 if neither nor applies. The truth table goes like this:
2305 op | cmpval | code | result
2306 ---------+--------+---------+--------------------
2307 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2308 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2309 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2310 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2311 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2312 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2313 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2314 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2316 sh_eval_treg_value (rtx op)
2318 enum rtx_code code = GET_CODE (op);
2319 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2320 return -1;
2322 int cmpop = code == EQ ? 1 : 0;
2323 int cmpval = INTVAL (XEXP (op, 1));
2324 if (cmpval != 0 && cmpval != 1)
2325 return -1;
2327 int t;
2328 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2329 t = 0;
2330 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2331 t = 1;
2332 else
2333 return -1;
2335 return t ^ (cmpval == cmpop);
2338 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2340 static void
2341 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2343 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2345 insn = gen_rtx_PARALLEL (VOIDmode,
2346 gen_rtvec (2, insn,
2347 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2348 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2350 else
2351 emit_insn (insn);
2354 /* Prepare the operands for an scc instruction; make sure that the
2355 compare has been done and the result is in T_REG. */
2356 void
2357 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2359 rtx t_reg = get_t_reg_rtx ();
2360 enum rtx_code oldcode = code;
2361 enum machine_mode mode;
2363 /* First need a compare insn. */
2364 switch (code)
2366 case NE:
2367 /* It isn't possible to handle this case. */
2368 gcc_unreachable ();
2369 case LT:
2370 code = GT;
2371 break;
2372 case LE:
2373 code = GE;
2374 break;
2375 case LTU:
2376 code = GTU;
2377 break;
2378 case LEU:
2379 code = GEU;
2380 break;
2381 default:
2382 break;
2384 if (code != oldcode)
2386 rtx tmp = op0;
2387 op0 = op1;
2388 op1 = tmp;
2391 mode = GET_MODE (op0);
2392 if (mode == VOIDmode)
2393 mode = GET_MODE (op1);
2395 op0 = force_reg (mode, op0);
2396 if ((code != EQ && code != NE
2397 && (op1 != const0_rtx
2398 || code == GTU || code == GEU || code == LTU || code == LEU))
2399 || (mode == DImode && op1 != const0_rtx)
2400 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2401 op1 = force_reg (mode, op1);
2403 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2404 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2405 mode);
2409 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2410 rtx op0, rtx op1)
2412 rtx target = gen_reg_rtx (SImode);
2413 rtx tmp;
2415 gcc_assert (TARGET_SHMEDIA);
2416 switch (code)
2418 case EQ:
2419 case GT:
2420 case LT:
2421 case UNORDERED:
2422 case GTU:
2423 case LTU:
2424 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2425 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2426 code = NE;
2427 break;
2429 case NE:
2430 case GE:
2431 case LE:
2432 case ORDERED:
2433 case GEU:
2434 case LEU:
2435 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2436 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2437 code = EQ;
2438 break;
2440 case UNEQ:
2441 case UNGE:
2442 case UNGT:
2443 case UNLE:
2444 case UNLT:
2445 case LTGT:
2446 return NULL_RTX;
2448 default:
2449 gcc_unreachable ();
2452 if (mode == DImode)
2454 rtx t2 = gen_reg_rtx (DImode);
2455 emit_insn (gen_extendsidi2 (t2, target));
2456 target = t2;
2459 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2462 /* Called from the md file, set up the operands of a compare instruction. */
2463 void
2464 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2466 enum rtx_code code = GET_CODE (operands[0]);
2467 enum rtx_code branch_code;
2468 rtx op0 = operands[1];
2469 rtx op1 = operands[2];
2470 rtx insn, tem;
2471 bool need_ccmpeq = false;
2473 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2475 op0 = force_reg (mode, op0);
2476 op1 = force_reg (mode, op1);
2478 else
2480 if (code != EQ || mode == DImode)
2482 /* Force args into regs, since we can't use constants here. */
2483 op0 = force_reg (mode, op0);
2484 if (op1 != const0_rtx || code == GTU || code == GEU)
2485 op1 = force_reg (mode, op1);
2489 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2491 if (code == LT
2492 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2493 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2495 tem = op0, op0 = op1, op1 = tem;
2496 code = swap_condition (code);
2499 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2500 if (code == GE)
2502 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2503 need_ccmpeq = true;
2504 code = GT;
2507 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2508 to EQ/GT respectively. */
2509 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2512 switch (code)
2514 case EQ:
2515 case GT:
2516 case GE:
2517 case GTU:
2518 case GEU:
2519 branch_code = code;
2520 break;
2521 case NE:
2522 case LT:
2523 case LE:
2524 case LTU:
2525 case LEU:
2526 branch_code = reverse_condition (code);
2527 break;
2528 default:
2529 gcc_unreachable ();
2532 insn = gen_rtx_SET (VOIDmode,
2533 get_t_reg_rtx (),
2534 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2536 sh_emit_set_t_insn (insn, mode);
2537 if (need_ccmpeq)
2538 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2540 if (branch_code == code)
2541 emit_jump_insn (gen_branch_true (operands[3]));
2542 else
2543 emit_jump_insn (gen_branch_false (operands[3]));
2546 void
2547 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2549 enum rtx_code code = GET_CODE (operands[1]);
2550 rtx op0 = operands[2];
2551 rtx op1 = operands[3];
2552 rtx lab = NULL_RTX;
2553 bool invert = false;
2554 rtx tem;
2556 op0 = force_reg (mode, op0);
2557 if ((code != EQ && code != NE
2558 && (op1 != const0_rtx
2559 || code == GTU || code == GEU || code == LTU || code == LEU))
2560 || (mode == DImode && op1 != const0_rtx)
2561 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2562 op1 = force_reg (mode, op1);
2564 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2566 if (code == LT || code == LE)
2568 code = swap_condition (code);
2569 tem = op0, op0 = op1, op1 = tem;
2571 if (code == GE)
2573 if (TARGET_IEEE)
2575 lab = gen_label_rtx ();
2576 sh_emit_scc_to_t (EQ, op0, op1);
2577 emit_jump_insn (gen_branch_true (lab));
2578 code = GT;
2580 else
2582 code = LT;
2583 invert = true;
2588 if (code == NE)
2590 code = EQ;
2591 invert = true;
2594 sh_emit_scc_to_t (code, op0, op1);
2595 if (lab)
2596 emit_label (lab);
2597 if (invert)
2598 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2599 else
2600 emit_move_insn (operands[0], get_t_reg_rtx ());
2603 /* Functions to output assembly code. */
2605 /* Return a sequence of instructions to perform DI or DF move.
2607 Since the SH cannot move a DI or DF in one instruction, we have
2608 to take care when we see overlapping source and dest registers. */
2609 const char *
2610 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2611 enum machine_mode mode)
2613 rtx dst = operands[0];
2614 rtx src = operands[1];
2616 if (MEM_P (dst)
2617 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2618 return "mov.l %T1,%0" "\n"
2619 " mov.l %1,%0";
2621 if (register_operand (dst, mode)
2622 && register_operand (src, mode))
2624 if (REGNO (src) == MACH_REG)
2625 return "sts mach,%S0" "\n"
2626 " sts macl,%R0";
2628 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2629 when mov.d r1,r0 do r1->r0 then r2->r1. */
2630 if (REGNO (src) + 1 == REGNO (dst))
2631 return "mov %T1,%T0" "\n"
2632 " mov %1,%0";
2633 else
2634 return "mov %1,%0" "\n"
2635 " mov %T1,%T0";
2637 else if (CONST_INT_P (src))
2639 if (INTVAL (src) < 0)
2640 output_asm_insn ("mov #-1,%S0", operands);
2641 else
2642 output_asm_insn ("mov #0,%S0", operands);
2644 return "mov %1,%R0";
2646 else if (MEM_P (src))
2648 int ptrreg = -1;
2649 int dreg = REGNO (dst);
2650 rtx inside = XEXP (src, 0);
2652 switch (GET_CODE (inside))
2654 case REG:
2655 ptrreg = REGNO (inside);
2656 break;
2658 case SUBREG:
2659 ptrreg = subreg_regno (inside);
2660 break;
2662 case PLUS:
2663 ptrreg = REGNO (XEXP (inside, 0));
2664 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2665 an offsettable address. Unfortunately, offsettable addresses use
2666 QImode to check the offset, and a QImode offsettable address
2667 requires r0 for the other operand, which is not currently
2668 supported, so we can't use the 'o' constraint.
2669 Thus we must check for and handle r0+REG addresses here.
2670 We punt for now, since this is likely very rare. */
2671 gcc_assert (!REG_P (XEXP (inside, 1)));
2672 break;
2674 case LABEL_REF:
2675 return "mov.l %1,%0" "\n"
2676 " mov.l %1+4,%T0";
2677 case POST_INC:
2678 return "mov.l %1,%0" "\n"
2679 " mov.l %1,%T0";
2680 default:
2681 gcc_unreachable ();
2684 /* Work out the safe way to copy. Copy into the second half first. */
2685 if (dreg == ptrreg)
2686 return "mov.l %T1,%T0" "\n"
2687 " mov.l %1,%0";
2690 return "mov.l %1,%0" "\n"
2691 " mov.l %T1,%T0";
2694 /* Print an instruction which would have gone into a delay slot after
2695 another instruction, but couldn't because the other instruction expanded
2696 into a sequence where putting the slot insn at the end wouldn't work. */
2697 static void
2698 print_slot (rtx insn)
2700 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2702 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2705 const char *
2706 output_far_jump (rtx insn, rtx op)
2708 struct { rtx lab, reg, op; } this_jmp;
2709 rtx braf_base_lab = NULL_RTX;
2710 const char *jump;
2711 int far;
2712 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2713 rtx prev;
2715 this_jmp.lab = gen_label_rtx ();
2717 if (TARGET_SH2
2718 && offset >= -32764
2719 && offset - get_attr_length (insn) <= 32766)
2721 far = 0;
2722 jump = "mov.w %O0,%1" "\n"
2723 " braf %1";
2725 else
2727 far = 1;
2728 if (flag_pic)
2730 if (TARGET_SH2)
2731 jump = "mov.l %O0,%1" "\n"
2732 " braf %1";
2733 else
2734 jump = "mov.l r0,@-r15" "\n"
2735 " mova %O0,r0" "\n"
2736 " mov.l @r0,%1" "\n"
2737 " add r0,%1" "\n"
2738 " mov.l @r15+,r0" "\n"
2739 " jmp @%1";
2741 else
2742 jump = "mov.l %O0,%1" "\n"
2743 " jmp @%1";
2745 /* If we have a scratch register available, use it. */
2746 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2747 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2749 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2750 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2751 jump = "mov.l r1,@-r15" "\n"
2752 " mova %O0,r0" "\n"
2753 " mov.l @r0,r1" "\n"
2754 " add r1,r0" "\n"
2755 " mov.l @r15+,r1" "\n"
2756 " jmp @%1";
2757 output_asm_insn (jump, &this_jmp.lab);
2758 if (dbr_sequence_length ())
2759 print_slot (final_sequence);
2760 else
2761 output_asm_insn ("nop", 0);
2763 else
2765 /* Output the delay slot insn first if any. */
2766 if (dbr_sequence_length ())
2767 print_slot (final_sequence);
2769 this_jmp.reg = gen_rtx_REG (SImode, 13);
2770 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2771 Fortunately, MACL is fixed and call-clobbered, and we never
2772 need its value across jumps, so save r13 in it instead of in
2773 the stack. */
2774 if (TARGET_SH5)
2775 output_asm_insn ("lds r13,macl", 0);
2776 else
2777 output_asm_insn ("mov.l r13,@-r15", 0);
2778 output_asm_insn (jump, &this_jmp.lab);
2779 if (TARGET_SH5)
2780 output_asm_insn ("sts macl,r13", 0);
2781 else
2782 output_asm_insn ("mov.l @r15+,r13", 0);
2784 if (far && flag_pic && TARGET_SH2)
2786 braf_base_lab = gen_label_rtx ();
2787 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2788 CODE_LABEL_NUMBER (braf_base_lab));
2790 if (far)
2791 output_asm_insn (".align 2", 0);
2792 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2793 this_jmp.op = op;
2794 if (far && flag_pic)
2796 if (TARGET_SH2)
2797 this_jmp.lab = braf_base_lab;
2798 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2800 else
2801 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2802 return "";
2805 /* Local label counter, used for constants in the pool and inside
2806 pattern branches. */
2807 static int lf = 100;
2809 /* Output code for ordinary branches. */
2810 const char *
2811 output_branch (int logic, rtx insn, rtx *operands)
2813 switch (get_attr_length (insn))
2815 case 6:
2816 /* This can happen if filling the delay slot has caused a forward
2817 branch to exceed its range (we could reverse it, but only
2818 when we know we won't overextend other branches; this should
2819 best be handled by relaxation).
2820 It can also happen when other condbranches hoist delay slot insn
2821 from their destination, thus leading to code size increase.
2822 But the branch will still be in the range -4092..+4098 bytes. */
2823 if (! TARGET_RELAX)
2825 int label = lf++;
2826 /* The call to print_slot will clobber the operands. */
2827 rtx op0 = operands[0];
2829 /* If the instruction in the delay slot is annulled (true), then
2830 there is no delay slot where we can put it now. The only safe
2831 place for it is after the label. final will do that by default. */
2833 if (final_sequence
2834 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2835 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2837 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2838 ASSEMBLER_DIALECT ? "/" : ".", label);
2839 print_slot (final_sequence);
2841 else
2842 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2844 output_asm_insn ("bra\t%l0", &op0);
2845 fprintf (asm_out_file, "\tnop\n");
2846 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2848 return "";
2850 /* When relaxing, handle this like a short branch. The linker
2851 will fix it up if it still doesn't fit after relaxation. */
2852 case 2:
2853 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2855 /* These are for SH2e, in which we have to account for the
2856 extra nop because of the hardware bug in annulled branches. */
2857 case 8:
2858 if (! TARGET_RELAX)
2860 int label = lf++;
2862 gcc_assert (!final_sequence
2863 || !(INSN_ANNULLED_BRANCH_P
2864 (XVECEXP (final_sequence, 0, 0))));
2865 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2866 logic ? "f" : "t",
2867 ASSEMBLER_DIALECT ? "/" : ".", label);
2868 fprintf (asm_out_file, "\tnop\n");
2869 output_asm_insn ("bra\t%l0", operands);
2870 fprintf (asm_out_file, "\tnop\n");
2871 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2873 return "";
2875 /* When relaxing, fall through. */
2876 case 4:
2878 char buffer[10];
2880 sprintf (buffer, "b%s%ss\t%%l0",
2881 logic ? "t" : "f",
2882 ASSEMBLER_DIALECT ? "/" : ".");
2883 output_asm_insn (buffer, &operands[0]);
2884 return "nop";
2887 default:
2888 /* There should be no longer branches now - that would
2889 indicate that something has destroyed the branches set
2890 up in machine_dependent_reorg. */
2891 gcc_unreachable ();
2895 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2896 fill in operands 9 as a label to the successor insn.
2897 We try to use jump threading where possible.
2898 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2899 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2900 follow jmp and bt, if the address is in range. */
2901 const char *
2902 output_branchy_insn (enum rtx_code code, const char *templ,
2903 rtx insn, rtx *operands)
2905 rtx next_insn = NEXT_INSN (insn);
2907 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2909 rtx src = SET_SRC (PATTERN (next_insn));
2910 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2912 /* Following branch not taken */
2913 operands[9] = gen_label_rtx ();
2914 emit_label_after (operands[9], next_insn);
2915 INSN_ADDRESSES_NEW (operands[9],
2916 INSN_ADDRESSES (INSN_UID (next_insn))
2917 + get_attr_length (next_insn));
2918 return templ;
2920 else
2922 int offset = (branch_dest (next_insn)
2923 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2924 if (offset >= -252 && offset <= 258)
2926 if (GET_CODE (src) == IF_THEN_ELSE)
2927 /* branch_true */
2928 src = XEXP (src, 1);
2929 operands[9] = src;
2930 return templ;
2934 operands[9] = gen_label_rtx ();
2935 emit_label_after (operands[9], insn);
2936 INSN_ADDRESSES_NEW (operands[9],
2937 INSN_ADDRESSES (INSN_UID (insn))
2938 + get_attr_length (insn));
2939 return templ;
2942 const char *
2943 output_ieee_ccmpeq (rtx insn, rtx *operands)
2945 return output_branchy_insn (NE, "bt %l9" "\n"
2946 " fcmp/eq %1,%0",
2947 insn, operands);
2950 /* Output the start of the assembler file. */
2951 static void
2952 sh_file_start (void)
2954 default_file_start ();
2956 if (TARGET_ELF)
2957 /* We need to show the text section with the proper
2958 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2959 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2960 will complain. We can teach GAS specifically about the
2961 default attributes for our choice of text section, but
2962 then we would have to change GAS again if/when we change
2963 the text section name. */
2964 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2965 else
2966 /* Switch to the data section so that the coffsem symbol
2967 isn't in the text section. */
2968 switch_to_section (data_section);
2970 if (TARGET_LITTLE_ENDIAN)
2971 fputs ("\t.little\n", asm_out_file);
2973 if (!TARGET_ELF)
2975 if (TARGET_SHCOMPACT)
2976 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2977 else if (TARGET_SHMEDIA)
2978 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2979 TARGET_SHMEDIA64 ? 64 : 32);
2983 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2984 static bool
2985 unspec_caller_rtx_p (rtx pat)
2987 rtx base, offset;
2988 int i;
2990 split_const (pat, &base, &offset);
2991 if (GET_CODE (base) == UNSPEC)
2993 if (XINT (base, 1) == UNSPEC_CALLER)
2994 return true;
2995 for (i = 0; i < XVECLEN (base, 0); i++)
2996 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2997 return true;
2999 return false;
3002 /* Indicate that INSN cannot be duplicated. This is true for insn
3003 that generates a unique label. */
3004 static bool
3005 sh_cannot_copy_insn_p (rtx insn)
3007 rtx pat;
3009 if (!reload_completed || !flag_pic)
3010 return false;
3012 if (!NONJUMP_INSN_P (insn))
3013 return false;
3014 if (asm_noperands (insn) >= 0)
3015 return false;
3017 pat = PATTERN (insn);
3018 if (GET_CODE (pat) != SET)
3019 return false;
3020 pat = SET_SRC (pat);
3022 if (unspec_caller_rtx_p (pat))
3023 return true;
3025 return false;
3028 /* Number of instructions used to make an arithmetic right shift by N. */
3029 static const char ashiftrt_insns[] =
3030 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
3032 /* Description of a logical left or right shift, when expanded to a sequence
3033 of 1/2/8/16 shifts.
3034 Notice that one bit right shifts clobber the T bit. One bit left shifts
3035 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
3036 enum
3038 ASHL_CLOBBERS_T = 1 << 0,
3039 LSHR_CLOBBERS_T = 1 << 1
3042 struct ashl_lshr_sequence
3044 char insn_count;
3045 char amount[6];
3046 char clobbers_t;
3049 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
3051 { 0, { 0 }, 0 }, // 0
3052 { 1, { 1 }, LSHR_CLOBBERS_T },
3053 { 1, { 2 }, 0 },
3054 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3055 { 2, { 2, 2 }, 0 }, // 4
3056 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3057 { 3, { 2, 2, 2 }, 0 },
3058 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3059 { 1, { 8 }, 0 }, // 8
3060 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3061 { 2, { 8, 2 }, 0 },
3062 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3063 { 3, { 8, 2, 2 }, 0 }, // 12
3064 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3065 { 3, { 8, -2, 8 }, 0 },
3066 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3067 { 1, { 16 }, 0 }, // 16
3068 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3069 { 2, { 16, 2 }, 0 },
3070 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3071 { 3, { 16, 2, 2 }, 0 }, // 20
3072 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3073 { 3, { 16, -2, 8 }, 0 },
3074 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3075 { 2, { 16, 8 }, 0 }, // 24
3076 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3077 { 3, { 16, 8, 2 }, 0 },
3078 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3079 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3080 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3081 { 3, { 16, -2, 16 }, 0 },
3083 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3084 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3085 However, the shift-and combiner code needs this entry here to be in
3086 terms of real shift insns. */
3087 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3090 /* Individual shift amounts for shift amounts < 16, up to three highmost
3091 bits might be clobbered. This is typically used when combined with some
3092 kind of sign or zero extension. */
3093 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3095 { 0, { 0 }, 0 }, // 0
3096 { 1, { 1 }, LSHR_CLOBBERS_T },
3097 { 1, { 2 }, 0 },
3098 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3099 { 2, { 2, 2 }, 0 }, // 4
3100 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3101 { 2, { 8, -2 }, 0 },
3102 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3103 { 1, { 8 }, 0 }, // 8
3104 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3105 { 2, { 8, 2 }, 0 },
3106 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3107 { 3, { 8, 2, 2 }, 0 }, // 12
3108 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3109 { 2, { 16, -2 }, 0 },
3110 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3111 { 1, { 16 }, 0 }, // 16
3112 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3113 { 2, { 16, 2 }, 0 },
3114 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3115 { 3, { 16, 2, 2 }, 0 }, // 20
3116 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3117 { 3, { 16, -2, 8 }, 0 },
3118 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3119 { 2, { 16, 8 }, 0 }, // 24
3120 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3121 { 3, { 16, 8, 2 }, 0 },
3122 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3123 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3124 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3125 { 3, { 16, -2, 16 }, 0 },
3126 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3129 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3130 will clobber the T bit. */
3131 bool
3132 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3134 gcc_assert (CONST_INT_P (shift_amount));
3136 const int shift_amount_i = INTVAL (shift_amount) & 31;
3138 /* Special case for shift count of 31: use and-rotl sequence. */
3139 if (shift_amount_i == 31)
3140 return true;
3142 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3143 & ASHL_CLOBBERS_T) != 0;
3146 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3147 instructions will clobber the T bit. */
3148 bool
3149 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3151 gcc_assert (CONST_INT_P (shift_amount));
3153 const int shift_amount_i = INTVAL (shift_amount) & 31;
3155 /* Special case for shift count of 31: use shll-movt sequence. */
3156 if (shift_amount_i == 31)
3157 return true;
3159 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3160 & LSHR_CLOBBERS_T) != 0;
3163 /* Return true if it is potentially beneficial to use a dynamic shift
3164 instruction (shad / shar) instead of a combination of 1/2/8/16
3165 shift instructions for the specified shift count.
3166 If dynamic shifts are not available, always return false. */
3167 bool
3168 sh_dynamicalize_shift_p (rtx count)
3170 gcc_assert (CONST_INT_P (count));
3172 const int shift_amount_i = INTVAL (count) & 31;
3173 int insn_count;
3175 /* For left and right shifts, there are shorter 2 insn sequences for
3176 shift amounts of 31. */
3177 if (shift_amount_i == 31)
3178 insn_count = 2;
3179 else
3180 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3182 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3185 /* Assuming we have a value that has been sign-extended by at least one bit,
3186 can we use the ext_shift_amounts with the last shift turned to an
3187 arithmetic shift to shift it by N without data loss, and quicker than by
3188 other means? */
3189 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3191 /* Return the cost of a shift. */
3192 static inline int
3193 shiftcosts (rtx x)
3195 int value;
3197 if (TARGET_SHMEDIA)
3198 return 1;
3200 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3202 if (GET_MODE (x) == DImode
3203 && CONST_INT_P (XEXP (x, 1))
3204 && INTVAL (XEXP (x, 1)) == 1)
3205 return 2;
3207 /* Everything else is invalid, because there is no pattern for it. */
3208 return -1;
3210 /* If shift by a non constant, then this will be expensive. */
3211 if (!CONST_INT_P (XEXP (x, 1)))
3212 return SH_DYNAMIC_SHIFT_COST;
3214 /* Otherwise, return the true cost in instructions. Cope with out of range
3215 shift counts more or less arbitrarily. */
3216 value = INTVAL (XEXP (x, 1)) & 31;
3218 if (GET_CODE (x) == ASHIFTRT)
3220 int cost = ashiftrt_insns[value];
3221 /* If dynamic shifts are available and profitable in this case, then we
3222 put the constant in a reg and use shad. */
3223 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3224 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3225 return cost;
3227 else
3228 return ashl_lshr_seq[value].insn_count;
3231 /* Return the cost of an AND/XOR/IOR operation. */
3232 static inline int
3233 and_xor_ior_costs (rtx x, int code)
3235 /* On SH1-4 we have only max. SImode operations.
3236 Double the cost for modes > SImode. */
3237 const int cost_scale = !TARGET_SHMEDIA
3238 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3239 ? 2 : 1;
3241 /* A logical operation with two registers is a single cycle
3242 instruction. */
3243 if (!CONST_INT_P (XEXP (x, 1)))
3244 return 1 * cost_scale;
3246 int i = INTVAL (XEXP (x, 1));
3248 if (TARGET_SHMEDIA)
3250 if (satisfies_constraint_I10 (XEXP (x, 1))
3251 || satisfies_constraint_J16 (XEXP (x, 1)))
3252 return 1;
3253 else
3254 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3257 /* These constants are single cycle extu.[bw] instructions. */
3258 if ((i == 0xff || i == 0xffff) && code == AND)
3259 return 1 * cost_scale;
3260 /* Constants that can be used in an instruction as an immediate are
3261 a single cycle, but this requires r0, so make it a little more
3262 expensive. */
3263 if (CONST_OK_FOR_K08 (i))
3264 return 2 * cost_scale;
3265 /* Constants that can be loaded with a mov immediate need one more cycle.
3266 This case is probably unnecessary. */
3267 if (CONST_OK_FOR_I08 (i))
3268 return 2 * cost_scale;
3269 /* Any other constant requires an additional 2 cycle pc-relative load.
3270 This case is probably unnecessary. */
3271 return 3 * cost_scale;
3274 /* Return the cost of an addition or a subtraction. */
3275 static inline int
3276 addsubcosts (rtx x)
3278 /* On SH1-4 we have only max. SImode operations.
3279 Double the cost for modes > SImode. */
3280 const int cost_scale = !TARGET_SHMEDIA
3281 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3282 ? 2 : 1;
3284 /* Adding a register is a single cycle insn. */
3285 if (REG_P (XEXP (x, 1))
3286 || GET_CODE (XEXP (x, 1)) == SUBREG)
3287 return 1 * cost_scale;
3289 /* Likewise for small constants. */
3290 if (CONST_INT_P (XEXP (x, 1))
3291 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3292 return 1 * cost_scale;
3294 if (TARGET_SHMEDIA)
3295 switch (GET_CODE (XEXP (x, 1)))
3297 case CONST:
3298 case LABEL_REF:
3299 case SYMBOL_REF:
3300 return TARGET_SHMEDIA64 ? 5 : 3;
3302 case CONST_INT:
3303 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3304 return 2;
3305 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3306 return 3;
3307 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3308 return 4;
3310 /* Fall through. */
3311 default:
3312 return 5;
3315 /* Any other constant requires a 2 cycle pc-relative load plus an
3316 addition. */
3317 return 3 * cost_scale;
3320 /* Return the cost of a multiply. */
3321 static inline int
3322 multcosts (rtx x ATTRIBUTE_UNUSED)
3324 if (sh_multcost >= 0)
3325 return sh_multcost;
3326 if (TARGET_SHMEDIA)
3327 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3328 accept constants. Ideally, we would use a cost of one or two and
3329 add the cost of the operand, but disregard the latter when inside loops
3330 and loop invariant code motion is still to follow.
3331 Using a multiply first and splitting it later if it's a loss
3332 doesn't work because of different sign / zero extension semantics
3333 of multiplies vs. shifts. */
3334 return optimize_size ? 2 : 3;
3336 if (TARGET_SH2)
3338 /* We have a mul insn, so we can never take more than the mul and the
3339 read of the mac reg, but count more because of the latency and extra
3340 reg usage. */
3341 if (optimize_size)
3342 return 2;
3343 return 3;
3346 /* If we're aiming at small code, then just count the number of
3347 insns in a multiply call sequence. */
3348 if (optimize_size)
3349 return 5;
3351 /* Otherwise count all the insns in the routine we'd be calling too. */
3352 return 20;
3355 /* Compute a (partial) cost for rtx X. Return true if the complete
3356 cost has been computed, and false if subexpressions should be
3357 scanned. In either case, *TOTAL contains the cost result. */
3358 static bool
3359 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3360 int *total, bool speed ATTRIBUTE_UNUSED)
3362 switch (code)
3364 /* The lower-subreg pass decides whether to split multi-word regs
3365 into individual regs by looking at the cost for a SET of certain
3366 modes with the following patterns:
3367 (set (reg) (reg))
3368 (set (reg) (const_int 0))
3369 On machines that support vector-move operations a multi-word move
3370 is the same cost as individual reg move. On SH there is no
3371 vector-move, so we have to provide the correct cost in the number
3372 of move insns to load/store the reg of the mode in question. */
3373 case SET:
3374 if (register_operand (SET_DEST (x), VOIDmode)
3375 && (register_operand (SET_SRC (x), VOIDmode)
3376 || satisfies_constraint_Z (SET_SRC (x))))
3378 const enum machine_mode mode = GET_MODE (SET_DEST (x));
3379 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3380 / mov_insn_size (mode, TARGET_SH2A));
3381 return true;
3383 return false;
3385 /* The cost of a mem access is mainly the cost of the address mode. */
3386 case MEM:
3387 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3388 true);
3389 return true;
3391 /* The cost of a sign or zero extend depends on whether the source is a
3392 reg or a mem. In case of a mem take the address into acount. */
3393 case SIGN_EXTEND:
3394 if (REG_P (XEXP (x, 0)))
3396 *total = COSTS_N_INSNS (1);
3397 return true;
3399 if (MEM_P (XEXP (x, 0)))
3401 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3402 GET_MODE (XEXP (x, 0)),
3403 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3404 return true;
3406 return false;
3408 case ZERO_EXTEND:
3409 if (REG_P (XEXP (x, 0)))
3411 *total = COSTS_N_INSNS (1);
3412 return true;
3414 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3415 && (GET_MODE (XEXP (x, 0)) == QImode
3416 || GET_MODE (XEXP (x, 0)) == HImode))
3418 /* Handle SH2A's movu.b and movu.w insn. */
3419 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3420 GET_MODE (XEXP (x, 0)),
3421 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3422 return true;
3424 return false;
3426 /* mems for SFmode and DFmode can be inside a parallel due to
3427 the way the fpscr is handled. */
3428 case PARALLEL:
3429 for (int i = 0; i < XVECLEN (x, 0); i++)
3431 rtx xx = XVECEXP (x, 0, i);
3432 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3434 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3435 GET_MODE (XEXP (xx, 0)),
3436 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3437 return true;
3439 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3441 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3442 GET_MODE (XEXP (xx, 1)),
3443 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3444 return true;
3448 if (sh_1el_vec (x, VOIDmode))
3449 *total = outer_code != SET;
3450 else if (sh_rep_vec (x, VOIDmode))
3451 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3452 + (outer_code != SET));
3453 else
3454 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3455 return true;
3457 case CONST_INT:
3458 if (TARGET_SHMEDIA)
3460 if (INTVAL (x) == 0)
3461 *total = 0;
3462 else if (outer_code == AND && and_operand ((x), DImode))
3463 *total = 0;
3464 else if ((outer_code == IOR || outer_code == XOR
3465 || outer_code == PLUS)
3466 && CONST_OK_FOR_I10 (INTVAL (x)))
3467 *total = 0;
3468 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3469 *total = COSTS_N_INSNS (outer_code != SET);
3470 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3471 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3472 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3473 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3474 else
3475 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3476 return true;
3478 if (CONST_OK_FOR_I08 (INTVAL (x)))
3479 *total = 0;
3480 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3481 && CONST_OK_FOR_K08 (INTVAL (x)))
3482 *total = 1;
3483 /* prepare_cmp_insn will force costly constants int registers before
3484 the cbranch[sd]i4 patterns can see them, so preserve potentially
3485 interesting ones not covered by I08 above. */
3486 else if (outer_code == COMPARE
3487 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3488 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3489 || INTVAL (x) == 0x7fffffff
3490 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3491 *total = 1;
3492 else
3493 *total = 8;
3494 return true;
3496 case EQ:
3497 /* An and with a constant compared against zero is
3498 most likely going to be a TST #imm, R0 instruction.
3499 Notice that this does not catch the zero_extract variants from
3500 the md file. */
3501 if (GET_CODE (XEXP (x, 0)) == AND
3502 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3504 *total = 1;
3505 return true;
3507 else
3508 return false;
3510 case CONST:
3511 case LABEL_REF:
3512 case SYMBOL_REF:
3513 if (TARGET_SHMEDIA64)
3514 *total = COSTS_N_INSNS (4);
3515 else if (TARGET_SHMEDIA32)
3516 *total = COSTS_N_INSNS (2);
3517 else
3518 *total = 5;
3519 return true;
3521 case CONST_DOUBLE:
3522 if (TARGET_SHMEDIA)
3523 *total = COSTS_N_INSNS (4);
3524 /* prepare_cmp_insn will force costly constants int registers before
3525 the cbranchdi4 pattern can see them, so preserve potentially
3526 interesting ones. */
3527 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3528 *total = 1;
3529 else
3530 *total = 10;
3531 return true;
3533 case CONST_VECTOR:
3534 /* FIXME: This looks broken. Only the last statement has any effect.
3535 Probably this could be folded with the PARALLEL case? */
3536 if (x == CONST0_RTX (GET_MODE (x)))
3537 *total = 0;
3538 else if (sh_1el_vec (x, VOIDmode))
3539 *total = outer_code != SET;
3540 if (sh_rep_vec (x, VOIDmode))
3541 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3542 + (outer_code != SET));
3543 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3544 return true;
3546 case PLUS:
3547 case MINUS:
3548 *total = COSTS_N_INSNS (addsubcosts (x));
3549 return true;
3551 case AND:
3552 case XOR:
3553 case IOR:
3554 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3555 return true;
3557 case MULT:
3558 *total = COSTS_N_INSNS (multcosts (x));
3559 return true;
3561 case LT:
3562 case GE:
3563 /* div0s sign comparison. */
3564 if (GET_CODE (XEXP (x, 0)) == XOR
3565 && REG_P ((XEXP (XEXP (x, 0), 0)))
3566 && REG_P ((XEXP (XEXP (x, 0), 1)))
3567 && satisfies_constraint_Z (XEXP (x, 1)))
3569 *total = COSTS_N_INSNS (1);
3570 return true;
3572 else
3573 return false;
3575 case LSHIFTRT:
3576 /* div0s sign comparison. */
3577 if (GET_CODE (XEXP (x, 0)) == XOR
3578 && REG_P ((XEXP (XEXP (x, 0), 0)))
3579 && REG_P ((XEXP (XEXP (x, 0), 1)))
3580 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3582 *total = COSTS_N_INSNS (1);
3583 return true;
3585 /* Fall through to shiftcosts. */
3586 case ASHIFT:
3587 case ASHIFTRT:
3589 int cost = shiftcosts (x);
3590 if (cost < 0)
3591 return false;
3592 *total = COSTS_N_INSNS (cost);
3593 return true;
3596 case DIV:
3597 case UDIV:
3598 case MOD:
3599 case UMOD:
3600 *total = COSTS_N_INSNS (20);
3601 return true;
3603 case FLOAT:
3604 case FIX:
3605 *total = 100;
3606 return true;
3608 default:
3609 return false;
3613 /* Determine the size of the fundamental move insn that will be used
3614 for the specified mode. */
3615 static inline int
3616 mov_insn_size (enum machine_mode mode, bool consider_sh2a)
3618 const int mode_sz = GET_MODE_SIZE (mode);
3620 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3621 || (TARGET_FMOVD && mode == DFmode))
3622 return mode_sz;
3623 else
3625 /* The max. available mode for actual move insns is SImode.
3626 Larger accesses will be split into multiple loads/stores. */
3627 const int max_mov_sz = GET_MODE_SIZE (SImode);
3628 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3632 /* Determine the maximum possible displacement for a move insn for the
3633 specified mode. */
3635 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3637 /* The 4 byte displacement move insns are the same as the 2 byte
3638 versions but take a 12 bit displacement. All we need to do is to
3639 scale the max. displacement value accordingly. */
3640 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3642 /* SH2A supports FPU move insns with 12 bit displacements.
3643 Other variants to do not support any kind of displacements for
3644 FPU move insns. */
3645 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3646 return 0;
3647 else
3649 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3650 const int mode_sz = GET_MODE_SIZE (mode);
3651 int r = 15 * mov_insn_sz * disp_scale;
3653 /* If the mov insn will be split into multiple loads/stores, the
3654 maximum possible displacement is a bit smaller. */
3655 if (mode_sz > mov_insn_sz)
3656 r -= mode_sz - mov_insn_sz;
3657 return r;
3661 /* Determine the alignment mask for a move insn of the
3662 specified mode. */
3663 static inline int
3664 mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
3666 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3667 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3670 /* Return the displacement value of a displacement address. */
3671 HOST_WIDE_INT
3672 sh_disp_addr_displacement (rtx x)
3674 gcc_assert (satisfies_constraint_Sdd (x));
3675 return INTVAL (XEXP (XEXP (x, 0), 1));
3678 /* Compute the cost of an address. */
3679 static int
3680 sh_address_cost (rtx x, enum machine_mode mode,
3681 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3683 /* 'GBR + 0'. Account one more because of R0 restriction. */
3684 if (REG_P (x) && REGNO (x) == GBR_REG)
3685 return 2;
3687 /* Simple reg, post-inc, pre-dec addressing. */
3688 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3689 return 1;
3691 /* 'reg + disp' addressing. */
3692 if (GET_CODE (x) == PLUS
3693 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3695 /* 'GBR + disp'. Account one more because of R0 restriction. */
3696 if (REGNO (XEXP (x, 0)) == GBR_REG
3697 && gbr_displacement (XEXP (x, 1), mode))
3698 return 2;
3700 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3702 if (offset == 0)
3703 return 1;
3705 /* The displacement would fit into a 2 byte move insn.
3706 HImode and QImode loads/stores with displacement put pressure on
3707 R0 which will most likely require another reg copy. Thus account
3708 a higher cost for that. */
3709 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3710 return (mode == HImode || mode == QImode) ? 2 : 1;
3712 /* The displacement would fit into a 4 byte move insn (SH2A). */
3713 if (TARGET_SH2A
3714 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3715 return 2;
3717 /* The displacement is probably out of range and will require extra
3718 calculations. */
3719 return 3;
3722 /* 'reg + reg' addressing. Account a slightly higher cost because of
3723 increased pressure on R0. */
3724 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3725 && ! TARGET_SHMEDIA)
3726 return 3;
3728 /* Not sure what it is - probably expensive. */
3729 return 10;
3732 /* Code to expand a shift. */
3733 static void
3734 gen_ashift (int type, int n, rtx reg)
3736 rtx n_rtx;
3738 /* Negative values here come from the shift_amounts array. */
3739 if (n < 0)
3741 if (type == ASHIFT)
3742 type = LSHIFTRT;
3743 else
3744 type = ASHIFT;
3745 n = -n;
3748 n_rtx = GEN_INT (n);
3749 gcc_assert (satisfies_constraint_P27 (n_rtx));
3751 switch (type)
3753 case ASHIFTRT:
3754 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3755 break;
3756 case LSHIFTRT:
3757 if (n == 1)
3758 emit_insn (gen_shlr (reg, reg));
3759 else
3760 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3761 break;
3762 case ASHIFT:
3763 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3764 break;
3765 default:
3766 gcc_unreachable ();
3770 /* Code to expand a HImode shift. */
3771 static void
3772 gen_ashift_hi (int type, int n, rtx reg)
3774 /* Negative values here come from the shift_amounts array. */
3775 if (n < 0)
3777 if (type == ASHIFT)
3778 type = LSHIFTRT;
3779 else
3780 type = ASHIFT;
3781 n = -n;
3784 switch (type)
3786 case ASHIFTRT:
3787 case LSHIFTRT:
3788 /* We don't have HImode right shift operations because using the
3789 ordinary 32 bit shift instructions for that doesn't generate proper
3790 zero/sign extension.
3791 gen_ashift_hi is only called in contexts where we know that the
3792 sign extension works out correctly. */
3794 int offset = 0;
3795 if (GET_CODE (reg) == SUBREG)
3797 offset = SUBREG_BYTE (reg);
3798 reg = SUBREG_REG (reg);
3800 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3801 break;
3803 case ASHIFT:
3804 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3805 break;
3809 /* Output RTL to split a constant shift into its component SH constant
3810 shift instructions. */
3811 void
3812 gen_shifty_op (int code, rtx *operands)
3814 int value = INTVAL (operands[2]);
3815 int max, i;
3817 /* Truncate the shift count in case it is out of bounds. */
3818 value = value & 31;
3820 if (value == 31)
3822 if (code == LSHIFTRT)
3824 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3825 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3826 return;
3828 else if (code == ASHIFT)
3830 /* There is a two instruction sequence for 31 bit left shifts,
3831 but it requires r0. */
3832 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3834 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3835 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3836 return;
3840 else if (value == 0)
3842 /* This can happen even when optimizing, if there were subregs before
3843 reload. Don't output a nop here, as this is never optimized away;
3844 use a no-op move instead. */
3845 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3846 return;
3849 max = ashl_lshr_seq[value].insn_count;
3850 for (i = 0; i < max; i++)
3851 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3854 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3855 don't matter. */
3856 void
3857 gen_shifty_hi_op (int code, rtx *operands)
3859 int value = INTVAL (operands[2]);
3860 int max, i;
3861 void (*gen_fun) (int, int, rtx);
3863 /* This operation is used by and_shl for SImode values with a few
3864 high bits known to be cleared. */
3865 value &= 31;
3866 if (value == 0)
3868 emit_insn (gen_nop ());
3869 return;
3872 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3873 if (code == ASHIFT)
3875 max = ext_ashl_lshr_seq[value].insn_count;
3876 for (i = 0; i < max; i++)
3877 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3879 else
3880 /* When shifting right, emit the shifts in reverse order, so that
3881 solitary negative values come first. */
3882 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3883 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3886 /* Output RTL for an arithmetic right shift.
3887 ??? Rewrite to use super-optimizer sequences. */
3888 bool
3889 expand_ashiftrt (rtx *operands)
3891 rtx wrk;
3892 char func[18];
3893 int value;
3895 if (TARGET_DYNSHIFT)
3897 if (!CONST_INT_P (operands[2]))
3899 rtx count = copy_to_mode_reg (SImode, operands[2]);
3900 emit_insn (gen_negsi2 (count, count));
3901 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3902 return true;
3904 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3905 > 1 + SH_DYNAMIC_SHIFT_COST)
3907 rtx count
3908 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3909 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3910 return true;
3913 if (!CONST_INT_P (operands[2]))
3914 return false;
3916 value = INTVAL (operands[2]) & 31;
3918 if (value == 31)
3920 /* If we are called from abs expansion, arrange things so that we
3921 we can use a single MT instruction that doesn't clobber the source,
3922 if LICM can hoist out the load of the constant zero. */
3923 if (currently_expanding_to_rtl)
3925 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3926 operands[1]));
3927 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3928 return true;
3930 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3931 return true;
3933 else if (value >= 16 && value <= 19)
3935 wrk = gen_reg_rtx (SImode);
3936 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3937 value -= 16;
3938 while (value--)
3939 gen_ashift (ASHIFTRT, 1, wrk);
3940 emit_move_insn (operands[0], wrk);
3941 return true;
3943 /* Expand a short sequence inline, longer call a magic routine. */
3944 else if (value <= 5)
3946 wrk = gen_reg_rtx (SImode);
3947 emit_move_insn (wrk, operands[1]);
3948 while (value--)
3949 gen_ashift (ASHIFTRT, 1, wrk);
3950 emit_move_insn (operands[0], wrk);
3951 return true;
3954 wrk = gen_reg_rtx (Pmode);
3956 /* Load the value into an arg reg and call a helper. */
3957 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3958 sprintf (func, "__ashiftrt_r4_%d", value);
3959 function_symbol (wrk, func, SFUNC_STATIC);
3960 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3961 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3962 return true;
3965 /* Try to find a good way to implement the combiner pattern
3966 [(set (match_operand:SI 0 "register_operand" "r")
3967 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3968 (match_operand:SI 2 "const_int_operand" "n"))
3969 (match_operand:SI 3 "const_int_operand" "n"))) .
3970 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3971 return 0 for simple right / left or left/right shift combination.
3972 return 1 for a combination of shifts with zero_extend.
3973 return 2 for a combination of shifts with an AND that needs r0.
3974 return 3 for a combination of shifts with an AND that needs an extra
3975 scratch register, when the three highmost bits of the AND mask are clear.
3976 return 4 for a combination of shifts with an AND that needs an extra
3977 scratch register, when any of the three highmost bits of the AND mask
3978 is set.
3979 If ATTRP is set, store an initial right shift width in ATTRP[0],
3980 and the instruction length in ATTRP[1] . These values are not valid
3981 when returning 0.
3982 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3983 shift_amounts for the last shift value that is to be used before the
3984 sign extend. */
3986 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3988 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3989 int left = INTVAL (left_rtx), right;
3990 int best = 0;
3991 int cost, best_cost = 10000;
3992 int best_right = 0, best_len = 0;
3993 int i;
3994 int can_ext;
3996 if (left < 0 || left > 31)
3997 return 0;
3998 if (CONST_INT_P (mask_rtx))
3999 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
4000 else
4001 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
4002 /* Can this be expressed as a right shift / left shift pair? */
4003 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
4004 right = exact_log2 (lsb);
4005 mask2 = ~(mask + lsb - 1);
4006 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
4007 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
4008 if (! mask2)
4009 best_cost = ashl_lshr_seq[right].insn_count
4010 + ashl_lshr_seq[right + left].insn_count;
4011 /* mask has no trailing zeroes <==> ! right */
4012 else if (! right && mask2 == ~(lsb2 - 1))
4014 int late_right = exact_log2 (lsb2);
4015 best_cost = ashl_lshr_seq[left + late_right].insn_count
4016 + ashl_lshr_seq[late_right].insn_count;
4018 /* Try to use zero extend. */
4019 if (mask2 == ~(lsb2 - 1))
4021 int width, first;
4023 for (width = 8; width <= 16; width += 8)
4025 /* Can we zero-extend right away? */
4026 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4028 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4029 + ext_ashl_lshr_seq[left + right].insn_count;
4030 if (cost < best_cost)
4032 best = 1;
4033 best_cost = cost;
4034 best_right = right;
4035 best_len = cost;
4036 if (attrp)
4037 attrp[2] = -1;
4039 continue;
4041 /* ??? Could try to put zero extend into initial right shift,
4042 or even shift a bit left before the right shift. */
4043 /* Determine value of first part of left shift, to get to the
4044 zero extend cut-off point. */
4045 first = width - exact_log2 (lsb2) + right;
4046 if (first >= 0 && right + left - first >= 0)
4048 cost = ext_ashl_lshr_seq[right].insn_count
4049 + ext_ashl_lshr_seq[first].insn_count + 1
4050 + ext_ashl_lshr_seq[right + left - first].insn_count;
4052 if (cost < best_cost)
4054 best = 1;
4055 best_cost = cost;
4056 best_right = right;
4057 best_len = cost;
4058 if (attrp)
4059 attrp[2] = first;
4064 /* Try to use r0 AND pattern */
4065 for (i = 0; i <= 2; i++)
4067 if (i > right)
4068 break;
4069 if (! CONST_OK_FOR_K08 (mask >> i))
4070 continue;
4071 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4072 if (cost < best_cost)
4074 best = 2;
4075 best_cost = cost;
4076 best_right = i;
4077 best_len = cost - 1;
4080 /* Try to use a scratch register to hold the AND operand. */
4081 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4082 for (i = 0; i <= 2; i++)
4084 if (i > right)
4085 break;
4086 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4087 + (can_ext
4088 ? ext_ashl_lshr_seq
4089 : ashl_lshr_seq)[left + i].insn_count;
4090 if (cost < best_cost)
4092 best = 4 - can_ext;
4093 best_cost = cost;
4094 best_right = i;
4095 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4099 if (attrp)
4101 attrp[0] = best_right;
4102 attrp[1] = best_len;
4104 return best;
4107 /* This is used in length attributes of the unnamed instructions
4108 corresponding to shl_and_kind return values of 1 and 2. */
4110 shl_and_length (rtx insn)
4112 rtx set_src, left_rtx, mask_rtx;
4113 int attributes[3];
4115 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4116 left_rtx = XEXP (XEXP (set_src, 0), 1);
4117 mask_rtx = XEXP (set_src, 1);
4118 shl_and_kind (left_rtx, mask_rtx, attributes);
4119 return attributes[1];
4122 /* This is used in length attribute of the and_shl_scratch instruction. */
4124 shl_and_scr_length (rtx insn)
4126 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4127 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4128 rtx op = XEXP (set_src, 0);
4129 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4130 op = XEXP (XEXP (op, 0), 0);
4131 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4134 /* Generate rtl for instructions for which shl_and_kind advised a particular
4135 method of generating them, i.e. returned zero. */
4136 bool
4137 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4139 int attributes[3];
4140 unsigned HOST_WIDE_INT mask;
4141 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4142 int right, total_shift;
4143 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4145 right = attributes[0];
4146 total_shift = INTVAL (left_rtx) + right;
4147 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4148 switch (kind)
4150 default:
4151 return true;
4152 case 1:
4154 int first = attributes[2];
4155 rtx operands[3];
4157 if (first < 0)
4159 emit_insn ((mask << right) <= 0xff
4160 ? gen_zero_extendqisi2 (dest,
4161 gen_lowpart (QImode, source))
4162 : gen_zero_extendhisi2 (dest,
4163 gen_lowpart (HImode, source)));
4164 source = dest;
4166 if (source != dest)
4167 emit_insn (gen_movsi (dest, source));
4168 operands[0] = dest;
4169 if (right)
4171 operands[2] = GEN_INT (right);
4172 gen_shifty_hi_op (LSHIFTRT, operands);
4174 if (first > 0)
4176 operands[2] = GEN_INT (first);
4177 gen_shifty_hi_op (ASHIFT, operands);
4178 total_shift -= first;
4179 mask <<= first;
4181 if (first >= 0)
4182 emit_insn (mask <= 0xff
4183 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4184 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4185 if (total_shift > 0)
4187 operands[2] = GEN_INT (total_shift);
4188 gen_shifty_hi_op (ASHIFT, operands);
4190 break;
4192 case 4:
4193 shift_gen_fun = gen_shifty_op;
4194 case 3:
4195 /* If the topmost bit that matters is set, set the topmost bits
4196 that don't matter. This way, we might be able to get a shorter
4197 signed constant. */
4198 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4199 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4200 case 2:
4201 /* Don't expand fine-grained when combining, because that will
4202 make the pattern fail. */
4203 if (currently_expanding_to_rtl
4204 || reload_in_progress || reload_completed)
4206 rtx operands[3];
4208 /* Cases 3 and 4 should be handled by this split
4209 only while combining */
4210 gcc_assert (kind <= 2);
4211 if (right)
4213 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4214 source = dest;
4216 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4217 if (total_shift)
4219 operands[0] = dest;
4220 operands[1] = dest;
4221 operands[2] = GEN_INT (total_shift);
4222 shift_gen_fun (ASHIFT, operands);
4224 break;
4226 else
4228 int neg = 0;
4229 if (kind != 4 && total_shift < 16)
4231 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4232 if (neg > 0)
4233 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4234 else
4235 neg = 0;
4237 emit_insn (gen_and_shl_scratch (dest, source,
4238 GEN_INT (right),
4239 GEN_INT (mask),
4240 GEN_INT (total_shift + neg),
4241 GEN_INT (neg)));
4242 emit_insn (gen_movsi (dest, dest));
4243 break;
4246 return false;
4249 /* Try to find a good way to implement the combiner pattern
4250 [(set (match_operand:SI 0 "register_operand" "=r")
4251 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4252 (match_operand:SI 2 "const_int_operand" "n")
4253 (match_operand:SI 3 "const_int_operand" "n")
4254 (const_int 0)))
4255 (clobber (reg:SI T_REG))]
4256 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4257 return 0 for simple left / right shift combination.
4258 return 1 for left shift / 8 bit sign extend / left shift.
4259 return 2 for left shift / 16 bit sign extend / left shift.
4260 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4261 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4262 return 5 for left shift / 16 bit sign extend / right shift
4263 return 6 for < 8 bit sign extend / left shift.
4264 return 7 for < 8 bit sign extend / left shift / single right shift.
4265 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4267 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4269 int left, size, insize, ext;
4270 int cost = 0, best_cost;
4271 int kind;
4273 left = INTVAL (left_rtx);
4274 size = INTVAL (size_rtx);
4275 insize = size - left;
4276 gcc_assert (insize > 0);
4277 /* Default to left / right shift. */
4278 kind = 0;
4279 best_cost = ashl_lshr_seq[32 - insize].insn_count
4280 + ashl_lshr_seq[32 - size].insn_count;
4281 if (size <= 16)
4283 /* 16 bit shift / sign extend / 16 bit shift */
4284 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4285 + ashl_lshr_seq[16 - size].insn_count;
4286 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4287 below, by alternative 3 or something even better. */
4288 if (cost < best_cost)
4290 kind = 5;
4291 best_cost = cost;
4294 /* Try a plain sign extend between two shifts. */
4295 for (ext = 16; ext >= insize; ext -= 8)
4297 if (ext <= size)
4299 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4300 + ashl_lshr_seq[size - ext].insn_count;
4301 if (cost < best_cost)
4303 kind = ext / (unsigned) 8;
4304 best_cost = cost;
4307 /* Check if we can do a sloppy shift with a final signed shift
4308 restoring the sign. */
4309 if (EXT_SHIFT_SIGNED (size - ext))
4310 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4311 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4312 /* If not, maybe it's still cheaper to do the second shift sloppy,
4313 and do a final sign extend? */
4314 else if (size <= 16)
4315 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4316 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4317 + 1;
4318 else
4319 continue;
4320 if (cost < best_cost)
4322 kind = ext / (unsigned) 8 + 2;
4323 best_cost = cost;
4326 /* Check if we can sign extend in r0 */
4327 if (insize < 8)
4329 cost = 3 + ashl_lshr_seq[left].insn_count;
4330 if (cost < best_cost)
4332 kind = 6;
4333 best_cost = cost;
4335 /* Try the same with a final signed shift. */
4336 if (left < 31)
4338 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4339 if (cost < best_cost)
4341 kind = 7;
4342 best_cost = cost;
4346 if (TARGET_DYNSHIFT)
4348 /* Try to use a dynamic shift. */
4349 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4350 if (cost < best_cost)
4352 kind = 0;
4353 best_cost = cost;
4356 if (costp)
4357 *costp = cost;
4358 return kind;
4361 /* Function to be used in the length attribute of the instructions
4362 implementing this pattern. */
4364 shl_sext_length (rtx insn)
4366 rtx set_src, left_rtx, size_rtx;
4367 int cost;
4369 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4370 left_rtx = XEXP (XEXP (set_src, 0), 1);
4371 size_rtx = XEXP (set_src, 1);
4372 shl_sext_kind (left_rtx, size_rtx, &cost);
4373 return cost;
4376 /* Generate rtl for this pattern */
4377 bool
4378 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4380 int kind;
4381 int left, size, insize, cost;
4382 rtx operands[3];
4384 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4385 left = INTVAL (left_rtx);
4386 size = INTVAL (size_rtx);
4387 insize = size - left;
4388 switch (kind)
4390 case 1:
4391 case 2:
4392 case 3:
4393 case 4:
4395 int ext = kind & 1 ? 8 : 16;
4396 int shift2 = size - ext;
4398 /* Don't expand fine-grained when combining, because that will
4399 make the pattern fail. */
4400 if (! currently_expanding_to_rtl
4401 && ! reload_in_progress && ! reload_completed)
4403 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4404 emit_insn (gen_movsi (dest, source));
4405 break;
4407 if (dest != source)
4408 emit_insn (gen_movsi (dest, source));
4409 operands[0] = dest;
4410 if (ext - insize)
4412 operands[2] = GEN_INT (ext - insize);
4413 gen_shifty_hi_op (ASHIFT, operands);
4415 emit_insn (kind & 1
4416 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4417 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4418 if (kind <= 2)
4420 if (shift2)
4422 operands[2] = GEN_INT (shift2);
4423 gen_shifty_op (ASHIFT, operands);
4426 else
4428 if (shift2 > 0)
4430 if (EXT_SHIFT_SIGNED (shift2))
4432 operands[2] = GEN_INT (shift2 + 1);
4433 gen_shifty_op (ASHIFT, operands);
4434 operands[2] = const1_rtx;
4435 gen_shifty_op (ASHIFTRT, operands);
4436 break;
4438 operands[2] = GEN_INT (shift2);
4439 gen_shifty_hi_op (ASHIFT, operands);
4441 else if (shift2)
4443 operands[2] = GEN_INT (-shift2);
4444 gen_shifty_hi_op (LSHIFTRT, operands);
4446 emit_insn (size <= 8
4447 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4448 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4450 break;
4452 case 5:
4454 int i = 16 - size;
4455 if (! currently_expanding_to_rtl
4456 && ! reload_in_progress && ! reload_completed)
4457 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4458 else
4460 operands[0] = dest;
4461 operands[2] = GEN_INT (16 - insize);
4462 gen_shifty_hi_op (ASHIFT, operands);
4463 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4465 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4466 while (--i >= 0)
4467 gen_ashift (ASHIFTRT, 1, dest);
4468 break;
4470 case 6:
4471 case 7:
4472 /* Don't expand fine-grained when combining, because that will
4473 make the pattern fail. */
4474 if (! currently_expanding_to_rtl
4475 && ! reload_in_progress && ! reload_completed)
4477 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4478 emit_insn (gen_movsi (dest, source));
4479 break;
4481 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4482 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4483 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4484 operands[0] = dest;
4485 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4486 gen_shifty_op (ASHIFT, operands);
4487 if (kind == 7)
4488 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4489 break;
4490 default:
4491 return true;
4493 return false;
4496 /* Prefix a symbol_ref name with "datalabel". */
4498 gen_datalabel_ref (rtx sym)
4500 const char *str;
4502 if (GET_CODE (sym) == LABEL_REF)
4503 return gen_rtx_CONST (GET_MODE (sym),
4504 gen_rtx_UNSPEC (GET_MODE (sym),
4505 gen_rtvec (1, sym),
4506 UNSPEC_DATALABEL));
4508 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4510 str = XSTR (sym, 0);
4511 /* Share all SYMBOL_REF strings with the same value - that is important
4512 for cse. */
4513 str = IDENTIFIER_POINTER (get_identifier (str));
4514 XSTR (sym, 0) = str;
4516 return sym;
4520 static alloc_pool label_ref_list_pool;
4522 typedef struct label_ref_list_d
4524 rtx label;
4525 struct label_ref_list_d *next;
4526 } *label_ref_list_t;
4528 /* The SH cannot load a large constant into a register, constants have to
4529 come from a pc relative load. The reference of a pc relative load
4530 instruction must be less than 1k in front of the instruction. This
4531 means that we often have to dump a constant inside a function, and
4532 generate code to branch around it.
4534 It is important to minimize this, since the branches will slow things
4535 down and make things bigger.
4537 Worst case code looks like:
4539 mov.l L1,rn
4540 bra L2
4542 align
4543 L1: .long value
4547 mov.l L3,rn
4548 bra L4
4550 align
4551 L3: .long value
4555 We fix this by performing a scan before scheduling, which notices which
4556 instructions need to have their operands fetched from the constant table
4557 and builds the table.
4559 The algorithm is:
4561 scan, find an instruction which needs a pcrel move. Look forward, find the
4562 last barrier which is within MAX_COUNT bytes of the requirement.
4563 If there isn't one, make one. Process all the instructions between
4564 the find and the barrier.
4566 In the above example, we can tell that L3 is within 1k of L1, so
4567 the first move can be shrunk from the 3 insn+constant sequence into
4568 just 1 insn, and the constant moved to L3 to make:
4570 mov.l L1,rn
4572 mov.l L3,rn
4573 bra L4
4575 align
4576 L3:.long value
4577 L4:.long value
4579 Then the second move becomes the target for the shortening process. */
4581 typedef struct
4583 rtx value; /* Value in table. */
4584 rtx label; /* Label of value. */
4585 label_ref_list_t wend; /* End of window. */
4586 enum machine_mode mode; /* Mode of value. */
4588 /* True if this constant is accessed as part of a post-increment
4589 sequence. Note that HImode constants are never accessed in this way. */
4590 bool part_of_sequence_p;
4591 } pool_node;
4593 /* The maximum number of constants that can fit into one pool, since
4594 constants in the range 0..510 are at least 2 bytes long, and in the
4595 range from there to 1018 at least 4 bytes. */
4597 #define MAX_POOL_SIZE 372
4598 static pool_node pool_vector[MAX_POOL_SIZE];
4599 static int pool_size;
4600 static rtx pool_window_label;
4601 static int pool_window_last;
4603 static int max_labelno_before_reorg;
4605 /* ??? If we need a constant in HImode which is the truncated value of a
4606 constant we need in SImode, we could combine the two entries thus saving
4607 two bytes. Is this common enough to be worth the effort of implementing
4608 it? */
4610 /* ??? This stuff should be done at the same time that we shorten branches.
4611 As it is now, we must assume that all branches are the maximum size, and
4612 this causes us to almost always output constant pools sooner than
4613 necessary. */
4615 /* Add a constant to the pool and return its label. */
4616 static rtx
4617 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4619 int i;
4620 rtx lab, new_rtx;
4621 label_ref_list_t ref, newref;
4623 /* First see if we've already got it. */
4624 for (i = 0; i < pool_size; i++)
4626 if (x->code == pool_vector[i].value->code
4627 && mode == pool_vector[i].mode)
4629 if (x->code == CODE_LABEL)
4631 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4632 continue;
4634 if (rtx_equal_p (x, pool_vector[i].value))
4636 lab = new_rtx = 0;
4637 if (! last_value
4638 || ! i
4639 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4641 new_rtx = gen_label_rtx ();
4642 LABEL_REFS (new_rtx) = pool_vector[i].label;
4643 pool_vector[i].label = lab = new_rtx;
4645 if (lab && pool_window_label)
4647 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4648 newref->label = pool_window_label;
4649 ref = pool_vector[pool_window_last].wend;
4650 newref->next = ref;
4651 pool_vector[pool_window_last].wend = newref;
4653 if (new_rtx)
4654 pool_window_label = new_rtx;
4655 pool_window_last = i;
4656 return lab;
4661 /* Need a new one. */
4662 pool_vector[pool_size].value = x;
4663 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4665 lab = 0;
4666 pool_vector[pool_size - 1].part_of_sequence_p = true;
4668 else
4669 lab = gen_label_rtx ();
4670 pool_vector[pool_size].mode = mode;
4671 pool_vector[pool_size].label = lab;
4672 pool_vector[pool_size].wend = NULL;
4673 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4674 if (lab && pool_window_label)
4676 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4677 newref->label = pool_window_label;
4678 ref = pool_vector[pool_window_last].wend;
4679 newref->next = ref;
4680 pool_vector[pool_window_last].wend = newref;
4682 if (lab)
4683 pool_window_label = lab;
4684 pool_window_last = pool_size;
4685 pool_size++;
4686 return lab;
4689 /* Output the literal table. START, if nonzero, is the first instruction
4690 this table is needed for, and also indicates that there is at least one
4691 casesi_worker_2 instruction; We have to emit the operand3 labels from
4692 these insns at a 4-byte aligned position. BARRIER is the barrier
4693 after which we are to place the table. */
4694 static void
4695 dump_table (rtx start, rtx barrier)
4697 rtx scan = barrier;
4698 int i;
4699 bool need_align = true;
4700 rtx lab;
4701 label_ref_list_t ref;
4702 bool have_df = false;
4704 /* Do two passes, first time dump out the HI sized constants. */
4706 for (i = 0; i < pool_size; i++)
4708 pool_node *p = &pool_vector[i];
4710 if (p->mode == HImode)
4712 if (need_align)
4714 scan = emit_insn_after (gen_align_2 (), scan);
4715 need_align = false;
4717 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4718 scan = emit_label_after (lab, scan);
4719 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4720 scan);
4721 for (ref = p->wend; ref; ref = ref->next)
4723 lab = ref->label;
4724 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4727 else if (p->mode == DFmode)
4728 have_df = true;
4731 need_align = true;
4733 if (start)
4735 scan = emit_insn_after (gen_align_4 (), scan);
4736 need_align = false;
4737 for (; start != barrier; start = NEXT_INSN (start))
4738 if (NONJUMP_INSN_P (start)
4739 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4741 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4742 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4744 scan = emit_label_after (lab, scan);
4747 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4749 rtx align_insn = NULL_RTX;
4751 scan = emit_label_after (gen_label_rtx (), scan);
4752 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4753 need_align = false;
4755 for (i = 0; i < pool_size; i++)
4757 pool_node *p = &pool_vector[i];
4759 switch (p->mode)
4761 case HImode:
4762 break;
4763 case SImode:
4764 case SFmode:
4765 if (align_insn && !p->part_of_sequence_p)
4767 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4768 emit_label_before (lab, align_insn);
4769 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4770 align_insn);
4771 for (ref = p->wend; ref; ref = ref->next)
4773 lab = ref->label;
4774 emit_insn_before (gen_consttable_window_end (lab),
4775 align_insn);
4777 delete_insn (align_insn);
4778 align_insn = NULL_RTX;
4779 continue;
4781 else
4783 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4784 scan = emit_label_after (lab, scan);
4785 scan = emit_insn_after (gen_consttable_4 (p->value,
4786 const0_rtx), scan);
4787 need_align = ! need_align;
4789 break;
4790 case DFmode:
4791 if (need_align)
4793 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4794 align_insn = scan;
4795 need_align = false;
4797 case DImode:
4798 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4799 scan = emit_label_after (lab, scan);
4800 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4801 scan);
4802 break;
4803 default:
4804 gcc_unreachable ();
4807 if (p->mode != HImode)
4809 for (ref = p->wend; ref; ref = ref->next)
4811 lab = ref->label;
4812 scan = emit_insn_after (gen_consttable_window_end (lab),
4813 scan);
4818 pool_size = 0;
4821 for (i = 0; i < pool_size; i++)
4823 pool_node *p = &pool_vector[i];
4825 switch (p->mode)
4827 case HImode:
4828 break;
4829 case SImode:
4830 case SFmode:
4831 if (need_align)
4833 need_align = false;
4834 scan = emit_label_after (gen_label_rtx (), scan);
4835 scan = emit_insn_after (gen_align_4 (), scan);
4837 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4838 scan = emit_label_after (lab, scan);
4839 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4840 scan);
4841 break;
4842 case DFmode:
4843 case DImode:
4844 if (need_align)
4846 need_align = false;
4847 scan = emit_label_after (gen_label_rtx (), scan);
4848 scan = emit_insn_after (gen_align_4 (), scan);
4850 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4851 scan = emit_label_after (lab, scan);
4852 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4853 scan);
4854 break;
4855 default:
4856 gcc_unreachable ();
4859 if (p->mode != HImode)
4861 for (ref = p->wend; ref; ref = ref->next)
4863 lab = ref->label;
4864 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4869 scan = emit_insn_after (gen_consttable_end (), scan);
4870 scan = emit_barrier_after (scan);
4871 pool_size = 0;
4872 pool_window_label = NULL_RTX;
4873 pool_window_last = 0;
4876 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4878 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4880 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4881 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4882 need to fix it if the input value is CONST_OK_FOR_I08. */
4883 static bool
4884 broken_move (rtx insn)
4886 if (NONJUMP_INSN_P (insn))
4888 rtx pat = PATTERN (insn);
4889 if (GET_CODE (pat) == PARALLEL)
4890 pat = XVECEXP (pat, 0, 0);
4891 if (GET_CODE (pat) == SET
4892 /* We can load any 8-bit value if we don't care what the high
4893 order bits end up as. */
4894 && GET_MODE (SET_DEST (pat)) != QImode
4895 && (CONSTANT_P (SET_SRC (pat))
4896 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4897 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4898 /* Match mova_const. */
4899 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4900 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4901 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4902 && ! (TARGET_SH2E
4903 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4904 && (fp_zero_operand (SET_SRC (pat))
4905 || fp_one_operand (SET_SRC (pat)))
4906 /* In general we don't know the current setting of fpscr, so
4907 disable fldi.
4908 There is an exception if this was a register-register move
4909 before reload - and hence it was ascertained that we have
4910 single precision setting - and in a post-reload optimization
4911 we changed this to do a constant load. In that case
4912 we don't have an r0 clobber, hence we must use fldi. */
4913 && (TARGET_FMOVD
4914 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4915 == SCRATCH))
4916 && REG_P (SET_DEST (pat))
4917 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4918 && ! (TARGET_SH2A
4919 && GET_MODE (SET_DEST (pat)) == SImode
4920 && (satisfies_constraint_I20 (SET_SRC (pat))
4921 || satisfies_constraint_I28 (SET_SRC (pat))))
4922 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4923 return true;
4926 return false;
4929 /* Return true if the specified insn is a mova insn. */
4930 static bool
4931 mova_p (rtx insn)
4933 return (NONJUMP_INSN_P (insn)
4934 && GET_CODE (PATTERN (insn)) == SET
4935 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4936 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4937 /* Don't match mova_const. */
4938 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4941 /* Fix up a mova from a switch that went out of range. */
4942 static void
4943 fixup_mova (rtx mova)
4945 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4946 if (! flag_pic)
4948 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4949 INSN_CODE (mova) = -1;
4951 else
4953 rtx worker = mova;
4954 rtx lab = gen_label_rtx ();
4955 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4959 worker = NEXT_INSN (worker);
4960 gcc_assert (worker
4961 && !LABEL_P (worker)
4962 && !JUMP_P (worker));
4963 } while (NOTE_P (worker)
4964 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4965 wpat = PATTERN (worker);
4966 wpat0 = XVECEXP (wpat, 0, 0);
4967 wpat1 = XVECEXP (wpat, 0, 1);
4968 wsrc = SET_SRC (wpat0);
4969 PATTERN (worker) = (gen_casesi_worker_2
4970 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4971 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4972 XEXP (wpat1, 0)));
4973 INSN_CODE (worker) = -1;
4974 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4975 base = gen_rtx_LABEL_REF (Pmode, lab);
4976 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4977 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4978 INSN_CODE (mova) = -1;
4982 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4983 *num_mova, and check if the new mova is not nested within the first one.
4984 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4985 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4986 static int
4987 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4989 int n_addr = 0; /* Initialization to shut up spurious warning. */
4990 int f_target, n_target = 0; /* Likewise. */
4992 if (optimize)
4994 /* If NEW_MOVA has no address yet, it will be handled later. */
4995 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4996 return -1;
4998 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4999 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
5000 if (n_addr > n_target || n_addr + 1022 < n_target)
5002 /* Change the mova into a load.
5003 broken_move will then return true for it. */
5004 fixup_mova (new_mova);
5005 return 1;
5008 if (!(*num_mova)++)
5010 *first_mova = new_mova;
5011 return 2;
5013 if (!optimize
5014 || ((f_target
5015 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5016 >= n_target))
5017 return -1;
5019 (*num_mova)--;
5020 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5021 > n_target - n_addr)
5023 fixup_mova (*first_mova);
5024 return 0;
5026 else
5028 fixup_mova (new_mova);
5029 return 1;
5033 /* Find the last barrier from insn FROM which is close enough to hold the
5034 constant pool. If we can't find one, then create one near the end of
5035 the range. */
5036 static rtx
5037 find_barrier (int num_mova, rtx mova, rtx from)
5039 int count_si = 0;
5040 int count_hi = 0;
5041 int found_hi = 0;
5042 int found_si = 0;
5043 int found_di = 0;
5044 int hi_align = 2;
5045 int si_align = 2;
5046 int leading_mova = num_mova;
5047 rtx barrier_before_mova = NULL_RTX;
5048 rtx found_barrier = NULL_RTX;
5049 rtx good_barrier = NULL_RTX;
5050 int si_limit;
5051 int hi_limit;
5052 rtx orig = from;
5053 rtx last_got = NULL_RTX;
5054 rtx last_symoff = NULL_RTX;
5056 /* For HImode: range is 510, add 4 because pc counts from address of
5057 second instruction after this one, subtract 2 for the jump instruction
5058 that we may need to emit before the table, subtract 2 for the instruction
5059 that fills the jump delay slot (in very rare cases, reorg will take an
5060 instruction from after the constant pool or will leave the delay slot
5061 empty). This gives 510.
5062 For SImode: range is 1020, add 4 because pc counts from address of
5063 second instruction after this one, subtract 2 in case pc is 2 byte
5064 aligned, subtract 2 for the jump instruction that we may need to emit
5065 before the table, subtract 2 for the instruction that fills the jump
5066 delay slot. This gives 1018. */
5068 /* The branch will always be shortened now that the reference address for
5069 forward branches is the successor address, thus we need no longer make
5070 adjustments to the [sh]i_limit for -O0. */
5072 si_limit = 1018;
5073 hi_limit = 510;
5075 while (from && count_si < si_limit && count_hi < hi_limit)
5077 int inc = get_attr_length (from);
5078 int new_align = 1;
5080 /* If this is a label that existed at the time of the compute_alignments
5081 call, determine the alignment. N.B. When find_barrier recurses for
5082 an out-of-reach mova, we might see labels at the start of previously
5083 inserted constant tables. */
5084 if (LABEL_P (from)
5085 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5087 if (optimize)
5088 new_align = 1 << label_to_alignment (from);
5089 else if (BARRIER_P (prev_nonnote_insn (from)))
5090 new_align = 1 << barrier_align (from);
5091 else
5092 new_align = 1;
5093 inc = 0;
5095 /* In case we are scanning a constant table because of recursion, check
5096 for explicit alignments. If the table is long, we might be forced
5097 to emit the new table in front of it; the length of the alignment
5098 might be the last straw. */
5099 else if (NONJUMP_INSN_P (from)
5100 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5101 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5102 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5103 /* When we find the end of a constant table, paste the new constant
5104 at the end. That is better than putting it in front because
5105 this way, we don't need extra alignment for adding a 4-byte-aligned
5106 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5107 else if (NONJUMP_INSN_P (from)
5108 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5109 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5110 return from;
5112 if (BARRIER_P (from))
5114 rtx next;
5116 found_barrier = from;
5118 /* If we are at the end of the function, or in front of an alignment
5119 instruction, we need not insert an extra alignment. We prefer
5120 this kind of barrier. */
5121 if (barrier_align (from) > 2)
5122 good_barrier = from;
5124 /* If we are at the end of a hot/cold block, dump the constants
5125 here. */
5126 next = NEXT_INSN (from);
5127 if (next
5128 && NOTE_P (next)
5129 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5130 break;
5133 if (broken_move (from))
5135 rtx pat, src, dst;
5136 enum machine_mode mode;
5138 pat = PATTERN (from);
5139 if (GET_CODE (pat) == PARALLEL)
5140 pat = XVECEXP (pat, 0, 0);
5141 src = SET_SRC (pat);
5142 dst = SET_DEST (pat);
5143 mode = GET_MODE (dst);
5145 /* GOT pcrelat setting comes in pair of
5146 mova .L8,r0
5147 mov.l .L8,r12
5148 instructions. (plus add r0,r12).
5149 Remember if we see one without the other. */
5150 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5151 last_got = last_got ? NULL_RTX : from;
5152 else if (PIC_ADDR_P (src))
5153 last_got = last_got ? NULL_RTX : from;
5155 /* We must explicitly check the mode, because sometimes the
5156 front end will generate code to load unsigned constants into
5157 HImode targets without properly sign extending them. */
5158 if (mode == HImode
5159 || (mode == SImode && satisfies_constraint_I16 (src)
5160 && REGNO (dst) != FPUL_REG))
5162 found_hi += 2;
5163 /* We put the short constants before the long constants, so
5164 we must count the length of short constants in the range
5165 for the long constants. */
5166 /* ??? This isn't optimal, but is easy to do. */
5167 si_limit -= 2;
5169 else
5171 /* We dump DF/DI constants before SF/SI ones, because
5172 the limit is the same, but the alignment requirements
5173 are higher. We may waste up to 4 additional bytes
5174 for alignment, and the DF/DI constant may have
5175 another SF/SI constant placed before it. */
5176 if (TARGET_SHCOMPACT
5177 && ! found_di
5178 && (mode == DFmode || mode == DImode))
5180 found_di = 1;
5181 si_limit -= 8;
5183 while (si_align > 2 && found_si + si_align - 2 > count_si)
5184 si_align >>= 1;
5185 if (found_si > count_si)
5186 count_si = found_si;
5187 found_si += GET_MODE_SIZE (mode);
5188 if (num_mova)
5189 si_limit -= GET_MODE_SIZE (mode);
5193 if (mova_p (from))
5195 switch (untangle_mova (&num_mova, &mova, from))
5197 case 1:
5198 if (flag_pic)
5200 rtx src = SET_SRC (PATTERN (from));
5201 if (GET_CODE (src) == CONST
5202 && GET_CODE (XEXP (src, 0)) == UNSPEC
5203 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5204 last_symoff = from;
5206 break;
5207 case 0: return find_barrier (0, 0, mova);
5208 case 2:
5210 leading_mova = 0;
5211 barrier_before_mova
5212 = good_barrier ? good_barrier : found_barrier;
5214 default: break;
5216 if (found_si > count_si)
5217 count_si = found_si;
5219 else if (JUMP_TABLE_DATA_P (from))
5221 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5222 || (num_mova
5223 && (prev_nonnote_insn (from)
5224 == XEXP (MOVA_LABELREF (mova), 0))))
5225 num_mova--;
5226 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5228 /* We have just passed the barrier in front of the
5229 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5230 the ADDR_DIFF_VEC is accessed as data, just like our pool
5231 constants, this is a good opportunity to accommodate what
5232 we have gathered so far.
5233 If we waited any longer, we could end up at a barrier in
5234 front of code, which gives worse cache usage for separated
5235 instruction / data caches. */
5236 good_barrier = found_barrier;
5237 break;
5239 else
5241 rtx body = PATTERN (from);
5242 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5245 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5246 else if (JUMP_P (from)
5247 && ! TARGET_SH2
5248 && ! optimize_size)
5249 new_align = 4;
5251 /* There is a possibility that a bf is transformed into a bf/s by the
5252 delay slot scheduler. */
5253 if (JUMP_P (from) && !JUMP_TABLE_DATA_P (from)
5254 && get_attr_type (from) == TYPE_CBRANCH
5255 && ! sequence_insn_p (from))
5256 inc += 2;
5258 if (found_si)
5260 count_si += inc;
5261 if (new_align > si_align)
5263 si_limit -= (count_si - 1) & (new_align - si_align);
5264 si_align = new_align;
5266 count_si = (count_si + new_align - 1) & -new_align;
5268 if (found_hi)
5270 count_hi += inc;
5271 if (new_align > hi_align)
5273 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5274 hi_align = new_align;
5276 count_hi = (count_hi + new_align - 1) & -new_align;
5278 from = NEXT_INSN (from);
5281 if (num_mova)
5283 if (leading_mova)
5285 /* Try as we might, the leading mova is out of range. Change
5286 it into a load (which will become a pcload) and retry. */
5287 fixup_mova (mova);
5288 return find_barrier (0, 0, mova);
5290 else
5292 /* Insert the constant pool table before the mova instruction,
5293 to prevent the mova label reference from going out of range. */
5294 from = mova;
5295 good_barrier = found_barrier = barrier_before_mova;
5299 if (found_barrier)
5301 if (good_barrier && next_real_insn (found_barrier))
5302 found_barrier = good_barrier;
5304 else
5306 /* We didn't find a barrier in time to dump our stuff,
5307 so we'll make one. */
5308 rtx label = gen_label_rtx ();
5310 /* Don't emit a constant table in the middle of insns for
5311 casesi_worker_2. This is a bit overkill but is enough
5312 because casesi_worker_2 wouldn't appear so frequently. */
5313 if (last_symoff)
5314 from = last_symoff;
5316 /* If we exceeded the range, then we must back up over the last
5317 instruction we looked at. Otherwise, we just need to undo the
5318 NEXT_INSN at the end of the loop. */
5319 if (PREV_INSN (from) != orig
5320 && (count_hi > hi_limit || count_si > si_limit))
5321 from = PREV_INSN (PREV_INSN (from));
5322 else
5323 from = PREV_INSN (from);
5325 /* Don't emit a constant table int the middle of global pointer setting,
5326 since that that would move the addressing base GOT into another table.
5327 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5328 in the pool anyway, so just move up the whole constant pool.
5330 However, avoid doing so when the last single GOT mov is the starting
5331 insn itself. Going past above the start insn would create a negative
5332 offset, causing errors. */
5333 if (last_got && last_got != orig)
5334 from = PREV_INSN (last_got);
5336 /* Don't insert the constant pool table at the position which
5337 may be the landing pad. */
5338 if (flag_exceptions
5339 && CALL_P (from)
5340 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5341 from = PREV_INSN (from);
5343 /* Walk back to be just before any jump or label.
5344 Putting it before a label reduces the number of times the branch
5345 around the constant pool table will be hit. Putting it before
5346 a jump makes it more likely that the bra delay slot will be
5347 filled. */
5348 while (NOTE_P (from) || JUMP_P (from)
5349 || LABEL_P (from))
5350 from = PREV_INSN (from);
5352 /* Make sure we do not split between a call and its corresponding
5353 CALL_ARG_LOCATION note. */
5354 if (CALL_P (from))
5356 rtx next = NEXT_INSN (from);
5357 if (next && NOTE_P (next)
5358 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5359 from = next;
5362 from = emit_jump_insn_after (gen_jump (label), from);
5363 JUMP_LABEL (from) = label;
5364 LABEL_NUSES (label) = 1;
5365 found_barrier = emit_barrier_after (from);
5366 emit_label_after (label, found_barrier);
5369 return found_barrier;
5372 /* If the instruction INSN is implemented by a special function, and we can
5373 positively find the register that is used to call the sfunc, and this
5374 register is not used anywhere else in this instruction - except as the
5375 destination of a set, return this register; else, return 0. */
5377 sfunc_uses_reg (rtx insn)
5379 int i;
5380 rtx pattern, part, reg_part, reg;
5382 if (!NONJUMP_INSN_P (insn))
5383 return NULL_RTX;
5384 pattern = PATTERN (insn);
5385 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5386 return NULL_RTX;
5388 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5390 part = XVECEXP (pattern, 0, i);
5391 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5392 reg_part = part;
5394 if (! reg_part)
5395 return NULL_RTX;
5396 reg = XEXP (reg_part, 0);
5397 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5399 part = XVECEXP (pattern, 0, i);
5400 if (part == reg_part || GET_CODE (part) == CLOBBER)
5401 continue;
5402 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5403 && REG_P (SET_DEST (part)))
5404 ? SET_SRC (part) : part)))
5405 return NULL_RTX;
5407 return reg;
5410 /* See if the only way in which INSN uses REG is by calling it, or by
5411 setting it while calling it. Set *SET to a SET rtx if the register
5412 is set by INSN. */
5413 static bool
5414 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
5416 rtx pattern, reg2;
5418 *set = NULL_RTX;
5420 reg2 = sfunc_uses_reg (insn);
5421 if (reg2 && REGNO (reg2) == REGNO (reg))
5423 pattern = single_set (insn);
5424 if (pattern
5425 && REG_P (SET_DEST (pattern))
5426 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5427 *set = pattern;
5428 return false;
5430 if (!CALL_P (insn))
5432 /* We don't use rtx_equal_p because we don't care if the mode is
5433 different. */
5434 pattern = single_set (insn);
5435 if (pattern
5436 && REG_P (SET_DEST (pattern))
5437 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5439 rtx par, part;
5440 int i;
5442 *set = pattern;
5443 par = PATTERN (insn);
5444 if (GET_CODE (par) == PARALLEL)
5445 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5447 part = XVECEXP (par, 0, i);
5448 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5449 return true;
5451 return reg_mentioned_p (reg, SET_SRC (pattern));
5454 return true;
5457 pattern = PATTERN (insn);
5459 if (GET_CODE (pattern) == PARALLEL)
5461 int i;
5463 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5464 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5465 return true;
5466 pattern = XVECEXP (pattern, 0, 0);
5469 if (GET_CODE (pattern) == SET)
5471 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5473 /* We don't use rtx_equal_p, because we don't care if the
5474 mode is different. */
5475 if (!REG_P (SET_DEST (pattern))
5476 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5477 return true;
5479 *set = pattern;
5482 pattern = SET_SRC (pattern);
5485 if (GET_CODE (pattern) != CALL
5486 || !MEM_P (XEXP (pattern, 0))
5487 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5488 return true;
5490 return false;
5493 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5494 general registers. Bits 0..15 mean that the respective registers
5495 are used as inputs in the instruction. Bits 16..31 mean that the
5496 registers 0..15, respectively, are used as outputs, or are clobbered.
5497 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5499 regs_used (rtx x, int is_dest)
5501 enum rtx_code code;
5502 const char *fmt;
5503 int i, used = 0;
5505 if (! x)
5506 return used;
5507 code = GET_CODE (x);
5508 switch (code)
5510 case REG:
5511 if (REGNO (x) < 16)
5512 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5513 << (REGNO (x) + is_dest));
5514 return 0;
5515 case SUBREG:
5517 rtx y = SUBREG_REG (x);
5519 if (!REG_P (y))
5520 break;
5521 if (REGNO (y) < 16)
5522 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5523 << (REGNO (y) +
5524 subreg_regno_offset (REGNO (y),
5525 GET_MODE (y),
5526 SUBREG_BYTE (x),
5527 GET_MODE (x)) + is_dest));
5528 return 0;
5530 case SET:
5531 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5532 case RETURN:
5533 /* If there was a return value, it must have been indicated with USE. */
5534 return 0x00ffff00;
5535 case CLOBBER:
5536 is_dest = 1;
5537 break;
5538 case MEM:
5539 is_dest = 0;
5540 break;
5541 case CALL:
5542 used |= 0x00ff00f0;
5543 break;
5544 default:
5545 break;
5548 fmt = GET_RTX_FORMAT (code);
5550 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5552 if (fmt[i] == 'E')
5554 int j;
5555 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5556 used |= regs_used (XVECEXP (x, i, j), is_dest);
5558 else if (fmt[i] == 'e')
5559 used |= regs_used (XEXP (x, i), is_dest);
5561 return used;
5564 /* Create an instruction that prevents redirection of a conditional branch
5565 to the destination of the JUMP with address ADDR.
5566 If the branch needs to be implemented as an indirect jump, try to find
5567 a scratch register for it.
5568 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5569 If any preceding insn that doesn't fit into a delay slot is good enough,
5570 pass 1. Pass 2 if a definite blocking insn is needed.
5571 -1 is used internally to avoid deep recursion.
5572 If a blocking instruction is made or recognized, return it. */
5573 static rtx
5574 gen_block_redirect (rtx jump, int addr, int need_block)
5576 int dead = 0;
5577 rtx prev = prev_nonnote_insn (jump);
5578 rtx dest;
5580 /* First, check if we already have an instruction that satisfies our need. */
5581 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
5583 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5584 return prev;
5585 if (GET_CODE (PATTERN (prev)) == USE
5586 || GET_CODE (PATTERN (prev)) == CLOBBER
5587 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5588 prev = jump;
5589 else if ((need_block &= ~1) < 0)
5590 return prev;
5591 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5592 need_block = 0;
5594 if (GET_CODE (PATTERN (jump)) == RETURN)
5596 if (! need_block)
5597 return prev;
5598 /* Reorg even does nasty things with return insns that cause branches
5599 to go out of range - see find_end_label and callers. */
5600 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5602 /* We can't use JUMP_LABEL here because it might be undefined
5603 when not optimizing. */
5604 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5605 /* If the branch is out of range, try to find a scratch register for it. */
5606 if (optimize
5607 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5608 > 4092 + 4098))
5610 rtx scan;
5611 /* Don't look for the stack pointer as a scratch register,
5612 it would cause trouble if an interrupt occurred. */
5613 unsigned attempt = 0x7fff, used;
5614 int jump_left = flag_expensive_optimizations + 1;
5616 /* It is likely that the most recent eligible instruction is wanted for
5617 the delay slot. Therefore, find out which registers it uses, and
5618 try to avoid using them. */
5620 for (scan = jump; (scan = PREV_INSN (scan)); )
5622 enum rtx_code code;
5624 if (INSN_DELETED_P (scan))
5625 continue;
5626 code = GET_CODE (scan);
5627 if (code == CODE_LABEL || code == JUMP_INSN)
5628 break;
5629 if (code == INSN
5630 && GET_CODE (PATTERN (scan)) != USE
5631 && GET_CODE (PATTERN (scan)) != CLOBBER
5632 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5634 attempt &= ~regs_used (PATTERN (scan), 0);
5635 break;
5638 for (used = dead = 0, scan = JUMP_LABEL (jump);
5639 (scan = NEXT_INSN (scan)); )
5641 enum rtx_code code;
5643 if (INSN_DELETED_P (scan))
5644 continue;
5645 code = GET_CODE (scan);
5646 if (INSN_P (scan))
5648 used |= regs_used (PATTERN (scan), 0);
5649 if (code == CALL_INSN)
5650 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5651 dead |= (used >> 16) & ~used;
5652 if (dead & attempt)
5654 dead &= attempt;
5655 break;
5657 if (code == JUMP_INSN)
5659 if (jump_left-- && simplejump_p (scan))
5660 scan = JUMP_LABEL (scan);
5661 else
5662 break;
5666 /* Mask out the stack pointer again, in case it was
5667 the only 'free' register we have found. */
5668 dead &= 0x7fff;
5670 /* If the immediate destination is still in range, check for possible
5671 threading with a jump beyond the delay slot insn.
5672 Don't check if we are called recursively; the jump has been or will be
5673 checked in a different invocation then. */
5675 else if (optimize && need_block >= 0)
5677 rtx next = next_active_insn (next_active_insn (dest));
5678 if (next && JUMP_P (next)
5679 && GET_CODE (PATTERN (next)) == SET
5680 && recog_memoized (next) == CODE_FOR_jump_compact)
5682 dest = JUMP_LABEL (next);
5683 if (dest
5684 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5685 > 4092 + 4098))
5686 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5690 if (dead)
5692 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5694 /* It would be nice if we could convert the jump into an indirect
5695 jump / far branch right now, and thus exposing all constituent
5696 instructions to further optimization. However, reorg uses
5697 simplejump_p to determine if there is an unconditional jump where
5698 it should try to schedule instructions from the target of the
5699 branch; simplejump_p fails for indirect jumps even if they have
5700 a JUMP_LABEL. */
5701 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5702 (reg, GEN_INT (unspec_bbr_uid++)),
5703 jump);
5704 /* ??? We would like this to have the scope of the jump, but that
5705 scope will change when a delay slot insn of an inner scope is added.
5706 Hence, after delay slot scheduling, we'll have to expect
5707 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5708 the jump. */
5710 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5711 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5712 return insn;
5714 else if (need_block)
5715 /* We can't use JUMP_LABEL here because it might be undefined
5716 when not optimizing. */
5717 return emit_insn_before (gen_block_branch_redirect
5718 (GEN_INT (unspec_bbr_uid++)),
5719 jump);
5720 return prev;
5723 #define CONDJUMP_MIN -252
5724 #define CONDJUMP_MAX 262
5725 struct far_branch
5727 /* A label (to be placed) in front of the jump
5728 that jumps to our ultimate destination. */
5729 rtx near_label;
5730 /* Where we are going to insert it if we cannot move the jump any farther,
5731 or the jump itself if we have picked up an existing jump. */
5732 rtx insert_place;
5733 /* The ultimate destination. */
5734 rtx far_label;
5735 struct far_branch *prev;
5736 /* If the branch has already been created, its address;
5737 else the address of its first prospective user. */
5738 int address;
5741 static void gen_far_branch (struct far_branch *);
5742 enum mdep_reorg_phase_e mdep_reorg_phase;
5743 static void
5744 gen_far_branch (struct far_branch *bp)
5746 rtx insn = bp->insert_place;
5747 rtx jump;
5748 rtx label = gen_label_rtx ();
5749 int ok;
5751 emit_label_after (label, insn);
5752 if (bp->far_label)
5754 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5755 LABEL_NUSES (bp->far_label)++;
5757 else
5758 jump = emit_jump_insn_after (gen_return (), insn);
5760 /* Emit a barrier so that reorg knows that any following instructions
5761 are not reachable via a fall-through path.
5762 But don't do this when not optimizing, since we wouldn't suppress the
5763 alignment for the barrier then, and could end up with out-of-range
5764 pc-relative loads. */
5765 if (optimize)
5766 emit_barrier_after (jump);
5767 emit_label_after (bp->near_label, insn);
5769 if (bp->far_label)
5770 JUMP_LABEL (jump) = bp->far_label;
5771 else
5773 rtx pat = PATTERN (jump);
5774 gcc_assert (ANY_RETURN_P (pat));
5775 JUMP_LABEL (jump) = pat;
5778 ok = invert_jump (insn, label, 1);
5779 gcc_assert (ok);
5781 /* If we are branching around a jump (rather than a return), prevent
5782 reorg from using an insn from the jump target as the delay slot insn -
5783 when reorg did this, it pessimized code (we rather hide the delay slot)
5784 and it could cause branches to go out of range. */
5785 if (bp->far_label)
5786 (emit_insn_after
5787 (gen_stuff_delay_slot
5788 (GEN_INT (unspec_bbr_uid++),
5789 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5790 insn));
5791 /* Prevent reorg from undoing our splits. */
5792 gen_block_redirect (jump, bp->address += 2, 2);
5795 /* Fix up ADDR_DIFF_VECs. */
5796 void
5797 fixup_addr_diff_vecs (rtx first)
5799 rtx insn;
5801 for (insn = first; insn; insn = NEXT_INSN (insn))
5803 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5805 if (!JUMP_P (insn)
5806 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5807 continue;
5808 pat = PATTERN (insn);
5809 vec_lab = XEXP (XEXP (pat, 0), 0);
5811 /* Search the matching casesi_jump_2. */
5812 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5814 if (!JUMP_P (prev))
5815 continue;
5816 prevpat = PATTERN (prev);
5817 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5818 continue;
5819 x = XVECEXP (prevpat, 0, 1);
5820 if (GET_CODE (x) != USE)
5821 continue;
5822 x = XEXP (x, 0);
5823 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5824 break;
5826 /* FIXME: This is a bug in the optimizer, but it seems harmless
5827 to just avoid panicing. */
5828 if (!prev)
5829 continue;
5831 /* Emit the reference label of the braf where it belongs, right after
5832 the casesi_jump_2 (i.e. braf). */
5833 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5834 emit_label_after (braf_label, prev);
5836 /* Fix up the ADDR_DIF_VEC to be relative
5837 to the reference address of the braf. */
5838 XEXP (XEXP (pat, 0), 0) = braf_label;
5842 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5843 a barrier. Return the base 2 logarithm of the desired alignment. */
5845 barrier_align (rtx barrier_or_label)
5847 rtx next = next_real_insn (barrier_or_label), pat, prev;
5849 if (! next)
5850 return 0;
5852 pat = PATTERN (next);
5854 if (GET_CODE (pat) == ADDR_DIFF_VEC)
5855 return 2;
5857 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5858 /* This is a barrier in front of a constant table. */
5859 return 0;
5861 prev = prev_real_insn (barrier_or_label);
5862 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
5864 pat = PATTERN (prev);
5865 /* If this is a very small table, we want to keep the alignment after
5866 the table to the minimum for proper code alignment. */
5867 return ((optimize_size
5868 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5869 <= (unsigned) 1 << (CACHE_LOG - 2)))
5870 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5873 if (optimize_size)
5874 return 0;
5876 if (! TARGET_SH2 || ! optimize)
5877 return align_jumps_log;
5879 /* When fixing up pcloads, a constant table might be inserted just before
5880 the basic block that ends with the barrier. Thus, we can't trust the
5881 instruction lengths before that. */
5882 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5884 /* Check if there is an immediately preceding branch to the insn beyond
5885 the barrier. We must weight the cost of discarding useful information
5886 from the current cache line when executing this branch and there is
5887 an alignment, against that of fetching unneeded insn in front of the
5888 branch target when there is no alignment. */
5890 /* There are two delay_slot cases to consider. One is the simple case
5891 where the preceding branch is to the insn beyond the barrier (simple
5892 delay slot filling), and the other is where the preceding branch has
5893 a delay slot that is a duplicate of the insn after the barrier
5894 (fill_eager_delay_slots) and the branch is to the insn after the insn
5895 after the barrier. */
5897 /* PREV is presumed to be the JUMP_INSN for the barrier under
5898 investigation. Skip to the insn before it. */
5900 int slot, credit;
5901 bool jump_to_next = false;
5903 prev = prev_real_insn (prev);
5905 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5906 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5907 prev = prev_real_insn (prev))
5909 jump_to_next = false;
5910 if (GET_CODE (PATTERN (prev)) == USE
5911 || GET_CODE (PATTERN (prev)) == CLOBBER)
5912 continue;
5913 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5915 prev = XVECEXP (PATTERN (prev), 0, 1);
5916 if (INSN_UID (prev) == INSN_UID (next))
5918 /* Delay slot was filled with insn at jump target. */
5919 jump_to_next = true;
5920 continue;
5924 if (slot &&
5925 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5926 slot = 0;
5927 credit -= get_attr_length (prev);
5929 if (prev && jump_to_label_p (prev))
5931 rtx x;
5932 if (jump_to_next
5933 || next_real_insn (JUMP_LABEL (prev)) == next
5934 /* If relax_delay_slots() decides NEXT was redundant
5935 with some previous instruction, it will have
5936 redirected PREV's jump to the following insn. */
5937 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5938 /* There is no upper bound on redundant instructions
5939 that might have been skipped, but we must not put an
5940 alignment where none had been before. */
5941 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5942 (INSN_P (x)
5943 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5944 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5945 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5947 rtx pat = PATTERN (prev);
5948 if (GET_CODE (pat) == PARALLEL)
5949 pat = XVECEXP (pat, 0, 0);
5950 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5951 return 0;
5956 return align_jumps_log;
5959 /* If we are inside a phony loop, almost any kind of label can turn up as the
5960 first one in the loop. Aligning a braf label causes incorrect switch
5961 destination addresses; we can detect braf labels because they are
5962 followed by a BARRIER.
5963 Applying loop alignment to small constant or switch tables is a waste
5964 of space, so we suppress this too. */
5966 sh_loop_align (rtx label)
5968 rtx next = label;
5970 if (! optimize || optimize_size)
5971 return 0;
5974 next = next_nonnote_insn (next);
5975 while (next && LABEL_P (next));
5977 if (! next
5978 || ! INSN_P (next)
5979 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
5980 || recog_memoized (next) == CODE_FOR_consttable_2)
5981 return 0;
5983 return align_loops_log;
5986 /* Do a final pass over the function, just before delayed branch
5987 scheduling. */
5988 static void
5989 sh_reorg (void)
5991 rtx first, insn, mova = NULL_RTX;
5992 int num_mova;
5993 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5994 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5996 first = get_insns ();
5997 max_labelno_before_reorg = max_label_num ();
5999 /* We must split call insns before introducing `mova's. If we're
6000 optimizing, they'll have already been split. Otherwise, make
6001 sure we don't split them too late. */
6002 if (! optimize)
6003 split_all_insns_noflow ();
6005 if (TARGET_SHMEDIA)
6006 return;
6008 /* If relaxing, generate pseudo-ops to associate function calls with
6009 the symbols they call. It does no harm to not generate these
6010 pseudo-ops. However, when we can generate them, it enables the
6011 linker to potentially relax the jsr to a bsr, and eliminate the
6012 register load and, possibly, the constant pool entry. */
6014 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6015 if (TARGET_RELAX)
6017 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6018 own purposes. This works because none of the remaining passes
6019 need to look at them.
6021 ??? But it may break in the future. We should use a machine
6022 dependent REG_NOTE, or some other approach entirely. */
6023 for (insn = first; insn; insn = NEXT_INSN (insn))
6025 if (INSN_P (insn))
6027 rtx note;
6029 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6030 NULL_RTX)) != 0)
6031 remove_note (insn, note);
6035 for (insn = first; insn; insn = NEXT_INSN (insn))
6037 rtx pattern, reg, link, set, scan, dies, label;
6038 int rescan = 0, foundinsn = 0;
6040 if (CALL_P (insn))
6042 pattern = PATTERN (insn);
6044 if (GET_CODE (pattern) == PARALLEL)
6045 pattern = XVECEXP (pattern, 0, 0);
6046 if (GET_CODE (pattern) == SET)
6047 pattern = SET_SRC (pattern);
6049 if (GET_CODE (pattern) != CALL
6050 || !MEM_P (XEXP (pattern, 0)))
6051 continue;
6053 reg = XEXP (XEXP (pattern, 0), 0);
6055 else
6057 reg = sfunc_uses_reg (insn);
6058 if (! reg)
6059 continue;
6062 if (!REG_P (reg))
6063 continue;
6065 /* Try scanning backward to find where the register is set. */
6066 link = NULL;
6067 for (scan = PREV_INSN (insn);
6068 scan && !LABEL_P (scan);
6069 scan = PREV_INSN (scan))
6071 if (! INSN_P (scan))
6072 continue;
6074 if (! reg_mentioned_p (reg, scan))
6075 continue;
6077 if (noncall_uses_reg (reg, scan, &set))
6078 break;
6080 if (set)
6082 link = scan;
6083 break;
6087 if (! link)
6088 continue;
6090 /* The register is set at LINK. */
6092 /* We can only optimize the function call if the register is
6093 being set to a symbol. In theory, we could sometimes
6094 optimize calls to a constant location, but the assembler
6095 and linker do not support that at present. */
6096 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6097 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6098 continue;
6100 /* Scan forward from LINK to the place where REG dies, and
6101 make sure that the only insns which use REG are
6102 themselves function calls. */
6104 /* ??? This doesn't work for call targets that were allocated
6105 by reload, since there may not be a REG_DEAD note for the
6106 register. */
6108 dies = NULL_RTX;
6109 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6111 rtx scanset;
6113 /* Don't try to trace forward past a CODE_LABEL if we haven't
6114 seen INSN yet. Ordinarily, we will only find the setting insn
6115 if it is in the same basic block. However,
6116 cross-jumping can insert code labels in between the load and
6117 the call, and can result in situations where a single call
6118 insn may have two targets depending on where we came from. */
6120 if (LABEL_P (scan) && ! foundinsn)
6121 break;
6123 if (! INSN_P (scan))
6124 continue;
6126 /* Don't try to trace forward past a JUMP. To optimize
6127 safely, we would have to check that all the
6128 instructions at the jump destination did not use REG. */
6130 if (JUMP_P (scan))
6131 break;
6133 if (! reg_mentioned_p (reg, scan))
6134 continue;
6136 if (noncall_uses_reg (reg, scan, &scanset))
6137 break;
6139 if (scan == insn)
6140 foundinsn = 1;
6142 if (scan != insn
6143 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6145 /* There is a function call to this register other
6146 than the one we are checking. If we optimize
6147 this call, we need to rescan again below. */
6148 rescan = 1;
6151 /* ??? We shouldn't have to worry about SCANSET here.
6152 We should just be able to check for a REG_DEAD note
6153 on a function call. However, the REG_DEAD notes are
6154 apparently not dependable around libcalls; c-torture
6155 execute/920501-2 is a test case. If SCANSET is set,
6156 then this insn sets the register, so it must have
6157 died earlier. Unfortunately, this will only handle
6158 the cases in which the register is, in fact, set in a
6159 later insn. */
6161 /* ??? We shouldn't have to use FOUNDINSN here.
6162 This dates back to when we used LOG_LINKS to find
6163 the most recent insn which sets the register. */
6165 if (foundinsn
6166 && (scanset
6167 || find_reg_note (scan, REG_DEAD, reg)))
6169 dies = scan;
6170 break;
6174 if (! dies)
6176 /* Either there was a branch, or some insn used REG
6177 other than as a function call address. */
6178 continue;
6181 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6182 on the insn which sets the register, and on each call insn
6183 which uses the register. In final_prescan_insn we look for
6184 the REG_LABEL_OPERAND notes, and output the appropriate label
6185 or pseudo-op. */
6187 label = gen_label_rtx ();
6188 add_reg_note (link, REG_LABEL_OPERAND, label);
6189 add_reg_note (insn, REG_LABEL_OPERAND, label);
6190 if (rescan)
6192 scan = link;
6195 rtx reg2;
6197 scan = NEXT_INSN (scan);
6198 if (scan != insn
6199 && ((CALL_P (scan)
6200 && reg_mentioned_p (reg, scan))
6201 || ((reg2 = sfunc_uses_reg (scan))
6202 && REGNO (reg2) == REGNO (reg))))
6203 add_reg_note (scan, REG_LABEL_OPERAND, label);
6205 while (scan != dies);
6210 if (TARGET_SH2)
6211 fixup_addr_diff_vecs (first);
6213 if (optimize)
6215 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6216 shorten_branches (first);
6219 /* Scan the function looking for move instructions which have to be
6220 changed to pc-relative loads and insert the literal tables. */
6221 label_ref_list_pool = create_alloc_pool ("label references list",
6222 sizeof (struct label_ref_list_d),
6223 30);
6224 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6225 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6227 if (mova_p (insn))
6229 /* ??? basic block reordering can move a switch table dispatch
6230 below the switch table. Check if that has happened.
6231 We only have the addresses available when optimizing; but then,
6232 this check shouldn't be needed when not optimizing. */
6233 if (!untangle_mova (&num_mova, &mova, insn))
6235 insn = mova;
6236 num_mova = 0;
6239 else if (JUMP_P (insn)
6240 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6241 && num_mova
6242 /* ??? loop invariant motion can also move a mova out of a
6243 loop. Since loop does this code motion anyway, maybe we
6244 should wrap UNSPEC_MOVA into a CONST, so that reload can
6245 move it back. */
6246 && ((num_mova > 1
6247 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6248 || (prev_nonnote_insn (insn)
6249 == XEXP (MOVA_LABELREF (mova), 0))))
6251 rtx scan;
6252 int total;
6254 num_mova--;
6256 /* Some code might have been inserted between the mova and
6257 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6258 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6259 total += get_attr_length (scan);
6261 /* range of mova is 1020, add 4 because pc counts from address of
6262 second instruction after this one, subtract 2 in case pc is 2
6263 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6264 cancels out with alignment effects of the mova itself. */
6265 if (total > 1022)
6267 /* Change the mova into a load, and restart scanning
6268 there. broken_move will then return true for mova. */
6269 fixup_mova (mova);
6270 insn = mova;
6273 if (broken_move (insn)
6274 || (NONJUMP_INSN_P (insn)
6275 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6277 rtx scan;
6278 /* Scan ahead looking for a barrier to stick the constant table
6279 behind. */
6280 rtx barrier = find_barrier (num_mova, mova, insn);
6281 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
6282 int need_aligned_label = 0;
6284 if (num_mova && ! mova_p (mova))
6286 /* find_barrier had to change the first mova into a
6287 pcload; thus, we have to start with this new pcload. */
6288 insn = mova;
6289 num_mova = 0;
6291 /* Now find all the moves between the points and modify them. */
6292 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6294 if (LABEL_P (scan))
6295 last_float = 0;
6296 if (NONJUMP_INSN_P (scan)
6297 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6298 need_aligned_label = 1;
6299 if (broken_move (scan))
6301 rtx *patp = &PATTERN (scan), pat = *patp;
6302 rtx src, dst;
6303 rtx lab;
6304 rtx newsrc;
6305 enum machine_mode mode;
6307 if (GET_CODE (pat) == PARALLEL)
6308 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6309 src = SET_SRC (pat);
6310 dst = SET_DEST (pat);
6311 mode = GET_MODE (dst);
6313 if (mode == SImode && satisfies_constraint_I16 (src)
6314 && REGNO (dst) != FPUL_REG)
6316 int offset = 0;
6318 mode = HImode;
6319 while (GET_CODE (dst) == SUBREG)
6321 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6322 GET_MODE (SUBREG_REG (dst)),
6323 SUBREG_BYTE (dst),
6324 GET_MODE (dst));
6325 dst = SUBREG_REG (dst);
6327 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6329 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6331 /* This must be an insn that clobbers r0. */
6332 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6333 XVECLEN (PATTERN (scan), 0)
6334 - 1);
6335 rtx clobber = *clobberp;
6337 gcc_assert (GET_CODE (clobber) == CLOBBER
6338 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6340 if (last_float
6341 && reg_set_between_p (r0_rtx, last_float_move, scan))
6342 last_float = 0;
6343 if (last_float
6344 && TARGET_SHCOMPACT
6345 && GET_MODE_SIZE (mode) != 4
6346 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6347 last_float = 0;
6348 lab = add_constant (src, mode, last_float);
6349 if (lab)
6350 emit_insn_before (gen_mova (lab), scan);
6351 else
6353 /* There will be a REG_UNUSED note for r0 on
6354 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6355 lest reorg:mark_target_live_regs will not
6356 consider r0 to be used, and we end up with delay
6357 slot insn in front of SCAN that clobbers r0. */
6358 rtx note
6359 = find_regno_note (last_float_move, REG_UNUSED, 0);
6361 /* If we are not optimizing, then there may not be
6362 a note. */
6363 if (note)
6364 PUT_REG_NOTE_KIND (note, REG_INC);
6366 *last_float_addr = r0_inc_rtx;
6368 last_float_move = scan;
6369 last_float = src;
6370 newsrc = gen_const_mem (mode,
6371 (((TARGET_SH4 && ! TARGET_FMOVD)
6372 || REGNO (dst) == FPUL_REG)
6373 ? r0_inc_rtx
6374 : r0_rtx));
6375 last_float_addr = &XEXP (newsrc, 0);
6377 /* Remove the clobber of r0. */
6378 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6379 gen_rtx_SCRATCH (Pmode));
6381 /* This is a mova needing a label. Create it. */
6382 else if (GET_CODE (src) == UNSPEC
6383 && XINT (src, 1) == UNSPEC_MOVA
6384 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6386 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6387 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6388 newsrc = gen_rtx_UNSPEC (SImode,
6389 gen_rtvec (1, newsrc),
6390 UNSPEC_MOVA);
6392 else if (GET_CODE (src) == UNSPEC_VOLATILE
6393 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6395 newsrc = XVECEXP (src, 0, 0);
6396 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6397 INSN_CODE (scan) = -1;
6398 continue;
6400 else
6402 lab = add_constant (src, mode, 0);
6403 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6404 newsrc = gen_const_mem (mode, newsrc);
6406 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
6407 INSN_CODE (scan) = -1;
6410 dump_table (need_aligned_label ? insn : 0, barrier);
6411 insn = barrier;
6414 free_alloc_pool (label_ref_list_pool);
6415 for (insn = first; insn; insn = NEXT_INSN (insn))
6416 PUT_MODE (insn, VOIDmode);
6418 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6419 INSN_ADDRESSES_FREE ();
6420 split_branches (first);
6422 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6423 also has an effect on the register that holds the address of the sfunc.
6424 Insert an extra dummy insn in front of each sfunc that pretends to
6425 use this register. */
6426 if (flag_delayed_branch)
6428 for (insn = first; insn; insn = NEXT_INSN (insn))
6430 rtx reg = sfunc_uses_reg (insn);
6432 if (! reg)
6433 continue;
6434 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6437 #if 0
6438 /* fpscr is not actually a user variable, but we pretend it is for the
6439 sake of the previous optimization passes, since we want it handled like
6440 one. However, we don't have any debugging information for it, so turn
6441 it into a non-user variable now. */
6442 if (TARGET_SH4)
6443 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
6444 #endif
6445 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6448 /* Return the UID of the insn that follows the specified label. */
6450 get_dest_uid (rtx label, int max_uid)
6452 rtx dest = next_real_insn (label);
6453 int dest_uid;
6454 if (! dest)
6455 /* This can happen for an undefined label. */
6456 return 0;
6457 dest_uid = INSN_UID (dest);
6458 /* If this is a newly created branch redirection blocking instruction,
6459 we cannot index the branch_uid or insn_addresses arrays with its
6460 uid. But then, we won't need to, because the actual destination is
6461 the following branch. */
6462 while (dest_uid >= max_uid)
6464 dest = NEXT_INSN (dest);
6465 dest_uid = INSN_UID (dest);
6467 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6468 return 0;
6469 return dest_uid;
6472 /* Split condbranches that are out of range. Also add clobbers for
6473 scratch registers that are needed in far jumps.
6474 We do this before delay slot scheduling, so that it can take our
6475 newly created instructions into account. It also allows us to
6476 find branches with common targets more easily. */
6477 static void
6478 split_branches (rtx first)
6480 rtx insn;
6481 struct far_branch **uid_branch, *far_branch_list = 0;
6482 int max_uid = get_max_uid ();
6483 int ok;
6485 /* Find out which branches are out of range. */
6486 shorten_branches (first);
6488 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6489 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6491 for (insn = first; insn; insn = NEXT_INSN (insn))
6492 if (! INSN_P (insn))
6493 continue;
6494 else if (INSN_DELETED_P (insn))
6496 /* Shorten_branches would split this instruction again,
6497 so transform it into a note. */
6498 SET_INSN_DELETED (insn);
6500 else if (JUMP_P (insn)
6501 /* Don't mess with ADDR_DIFF_VEC */
6502 && (GET_CODE (PATTERN (insn)) == SET
6503 || GET_CODE (PATTERN (insn)) == RETURN))
6505 enum attr_type type = get_attr_type (insn);
6506 if (type == TYPE_CBRANCH)
6508 rtx next, beyond;
6510 if (get_attr_length (insn) > 4)
6512 rtx src = SET_SRC (PATTERN (insn));
6513 rtx olabel = XEXP (XEXP (src, 1), 0);
6514 int addr = INSN_ADDRESSES (INSN_UID (insn));
6515 rtx label = 0;
6516 int dest_uid = get_dest_uid (olabel, max_uid);
6517 struct far_branch *bp = uid_branch[dest_uid];
6519 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6520 the label if the LABEL_NUSES count drops to zero. There is
6521 always a jump_optimize pass that sets these values, but it
6522 proceeds to delete unreferenced code, and then if not
6523 optimizing, to un-delete the deleted instructions, thus
6524 leaving labels with too low uses counts. */
6525 if (! optimize)
6527 JUMP_LABEL (insn) = olabel;
6528 LABEL_NUSES (olabel)++;
6530 if (! bp)
6532 bp = (struct far_branch *) alloca (sizeof *bp);
6533 uid_branch[dest_uid] = bp;
6534 bp->prev = far_branch_list;
6535 far_branch_list = bp;
6536 bp->far_label
6537 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6538 LABEL_NUSES (bp->far_label)++;
6540 else
6542 label = bp->near_label;
6543 if (! label && bp->address - addr >= CONDJUMP_MIN)
6545 rtx block = bp->insert_place;
6547 if (GET_CODE (PATTERN (block)) == RETURN)
6548 block = PREV_INSN (block);
6549 else
6550 block = gen_block_redirect (block,
6551 bp->address, 2);
6552 label = emit_label_after (gen_label_rtx (),
6553 PREV_INSN (block));
6554 bp->near_label = label;
6556 else if (label && ! NEXT_INSN (label))
6558 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6559 bp->insert_place = insn;
6560 else
6561 gen_far_branch (bp);
6564 if (! label
6565 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6567 bp->near_label = label = gen_label_rtx ();
6568 bp->insert_place = insn;
6569 bp->address = addr;
6571 ok = redirect_jump (insn, label, 0);
6572 gcc_assert (ok);
6574 else
6576 /* get_attr_length (insn) == 2 */
6577 /* Check if we have a pattern where reorg wants to redirect
6578 the branch to a label from an unconditional branch that
6579 is too far away. */
6580 /* We can't use JUMP_LABEL here because it might be undefined
6581 when not optimizing. */
6582 /* A syntax error might cause beyond to be NULL_RTX. */
6583 beyond
6584 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6585 0));
6587 if (beyond
6588 && (JUMP_P (beyond)
6589 || ((beyond = next_active_insn (beyond))
6590 && JUMP_P (beyond)))
6591 && GET_CODE (PATTERN (beyond)) == SET
6592 && recog_memoized (beyond) == CODE_FOR_jump_compact
6593 && ((INSN_ADDRESSES
6594 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6595 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6596 > 252 + 258 + 2))
6597 gen_block_redirect (beyond,
6598 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6601 next = next_active_insn (insn);
6603 if (next
6604 && (JUMP_P (next)
6605 || ((next = next_active_insn (next))
6606 && JUMP_P (next)))
6607 && GET_CODE (PATTERN (next)) == SET
6608 && recog_memoized (next) == CODE_FOR_jump_compact
6609 && ((INSN_ADDRESSES
6610 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6611 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6612 > 252 + 258 + 2))
6613 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6615 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6617 int addr = INSN_ADDRESSES (INSN_UID (insn));
6618 rtx far_label = 0;
6619 int dest_uid = 0;
6620 struct far_branch *bp;
6622 if (type == TYPE_JUMP)
6624 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6625 dest_uid = get_dest_uid (far_label, max_uid);
6626 if (! dest_uid)
6628 /* Parse errors can lead to labels outside
6629 the insn stream. */
6630 if (! NEXT_INSN (far_label))
6631 continue;
6633 if (! optimize)
6635 JUMP_LABEL (insn) = far_label;
6636 LABEL_NUSES (far_label)++;
6638 redirect_jump (insn, ret_rtx, 1);
6639 far_label = 0;
6642 bp = uid_branch[dest_uid];
6643 if (! bp)
6645 bp = (struct far_branch *) alloca (sizeof *bp);
6646 uid_branch[dest_uid] = bp;
6647 bp->prev = far_branch_list;
6648 far_branch_list = bp;
6649 bp->near_label = 0;
6650 bp->far_label = far_label;
6651 if (far_label)
6652 LABEL_NUSES (far_label)++;
6654 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6655 if (addr - bp->address <= CONDJUMP_MAX)
6656 emit_label_after (bp->near_label, PREV_INSN (insn));
6657 else
6659 gen_far_branch (bp);
6660 bp->near_label = 0;
6662 else
6663 bp->near_label = 0;
6664 bp->address = addr;
6665 bp->insert_place = insn;
6666 if (! far_label)
6667 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6668 else
6669 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6672 /* Generate all pending far branches,
6673 and free our references to the far labels. */
6674 while (far_branch_list)
6676 if (far_branch_list->near_label
6677 && ! NEXT_INSN (far_branch_list->near_label))
6678 gen_far_branch (far_branch_list);
6679 if (optimize
6680 && far_branch_list->far_label
6681 && ! --LABEL_NUSES (far_branch_list->far_label))
6682 delete_insn (far_branch_list->far_label);
6683 far_branch_list = far_branch_list->prev;
6686 /* Instruction length information is no longer valid due to the new
6687 instructions that have been generated. */
6688 init_insn_lengths ();
6691 /* Dump out instruction addresses, which is useful for debugging the
6692 constant pool table stuff.
6694 If relaxing, output the label and pseudo-ops used to link together
6695 calls and the instruction which set the registers.
6697 ??? The addresses printed by this routine for insns are nonsense for
6698 insns which are inside of a sequence where none of the inner insns have
6699 variable length. This is because the second pass of shorten_branches
6700 does not bother to update them. */
6701 void
6702 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6703 int noperands ATTRIBUTE_UNUSED)
6705 if (TARGET_DUMPISIZE)
6706 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6708 if (TARGET_RELAX)
6710 rtx note;
6712 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6713 if (note)
6715 rtx pattern;
6717 pattern = PATTERN (insn);
6718 if (GET_CODE (pattern) == PARALLEL)
6719 pattern = XVECEXP (pattern, 0, 0);
6720 switch (GET_CODE (pattern))
6722 case SET:
6723 if (GET_CODE (SET_SRC (pattern)) != CALL
6724 && get_attr_type (insn) != TYPE_SFUNC)
6726 targetm.asm_out.internal_label
6727 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6728 break;
6730 /* else FALLTHROUGH */
6731 case CALL:
6732 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6733 CODE_LABEL_NUMBER (XEXP (note, 0)));
6734 break;
6736 default:
6737 gcc_unreachable ();
6743 /* Dump out any constants accumulated in the final pass. These will
6744 only be labels. */
6745 const char *
6746 output_jump_label_table (void)
6748 int i;
6750 if (pool_size)
6752 fprintf (asm_out_file, "\t.align 2\n");
6753 for (i = 0; i < pool_size; i++)
6755 pool_node *p = &pool_vector[i];
6757 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6758 CODE_LABEL_NUMBER (p->label));
6759 output_asm_insn (".long %O0", &p->value);
6761 pool_size = 0;
6764 return "";
6767 /* A full frame looks like:
6769 arg-5
6770 arg-4
6771 [ if current_function_anonymous_args
6772 arg-3
6773 arg-2
6774 arg-1
6775 arg-0 ]
6776 saved-fp
6777 saved-r10
6778 saved-r11
6779 saved-r12
6780 saved-pr
6781 local-n
6783 local-1
6784 local-0 <- fp points here.
6786 Number of bytes pushed for anonymous args, used to pass information
6787 between expand_prologue and expand_epilogue.
6789 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6790 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6791 for an epilogue and a negative value means that it's for a sibcall
6792 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6793 all the registers that are about to be restored, and hence dead. */
6794 static void
6795 output_stack_adjust (int size, rtx reg, int epilogue_p,
6796 HARD_REG_SET *live_regs_mask, bool frame_p)
6798 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6799 if (size)
6801 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6803 /* This test is bogus, as output_stack_adjust is used to re-align the
6804 stack. */
6805 #if 0
6806 gcc_assert (!(size % align));
6807 #endif
6809 if (CONST_OK_FOR_ADD (size))
6810 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6811 /* Try to do it with two partial adjustments; however, we must make
6812 sure that the stack is properly aligned at all times, in case
6813 an interrupt occurs between the two partial adjustments. */
6814 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6815 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6817 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6818 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6820 else
6822 rtx const_reg;
6823 rtx insn;
6824 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6825 int i;
6827 /* If TEMP is invalid, we could temporarily save a general
6828 register to MACL. However, there is currently no need
6829 to handle this case, so just die when we see it. */
6830 if (epilogue_p < 0
6831 || current_function_interrupt
6832 || ! call_really_used_regs[temp] || fixed_regs[temp])
6833 temp = -1;
6834 if (temp < 0 && ! current_function_interrupt
6835 && (TARGET_SHMEDIA || epilogue_p >= 0))
6837 HARD_REG_SET temps;
6838 COPY_HARD_REG_SET (temps, call_used_reg_set);
6839 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6840 if (epilogue_p > 0)
6842 int nreg = 0;
6843 if (crtl->return_rtx)
6845 enum machine_mode mode;
6846 mode = GET_MODE (crtl->return_rtx);
6847 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6848 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6850 for (i = 0; i < nreg; i++)
6851 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6852 if (crtl->calls_eh_return)
6854 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6855 for (i = 0; i <= 3; i++)
6856 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6859 if (TARGET_SHMEDIA && epilogue_p < 0)
6860 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6861 CLEAR_HARD_REG_BIT (temps, i);
6862 if (epilogue_p <= 0)
6864 for (i = FIRST_PARM_REG;
6865 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6866 CLEAR_HARD_REG_BIT (temps, i);
6867 if (cfun->static_chain_decl != NULL)
6868 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6870 temp = scavenge_reg (&temps);
6872 if (temp < 0 && live_regs_mask)
6874 HARD_REG_SET temps;
6876 COPY_HARD_REG_SET (temps, *live_regs_mask);
6877 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6878 temp = scavenge_reg (&temps);
6880 if (temp < 0)
6882 rtx adj_reg, tmp_reg, mem;
6884 /* If we reached here, the most likely case is the (sibcall)
6885 epilogue for non SHmedia. Put a special push/pop sequence
6886 for such case as the last resort. This looks lengthy but
6887 would not be problem because it seems to be very
6888 rare. */
6890 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6893 /* ??? There is still the slight possibility that r4 or
6894 r5 have been reserved as fixed registers or assigned
6895 as global registers, and they change during an
6896 interrupt. There are possible ways to handle this:
6898 - If we are adjusting the frame pointer (r14), we can do
6899 with a single temp register and an ordinary push / pop
6900 on the stack.
6901 - Grab any call-used or call-saved registers (i.e. not
6902 fixed or globals) for the temps we need. We might
6903 also grab r14 if we are adjusting the stack pointer.
6904 If we can't find enough available registers, issue
6905 a diagnostic and die - the user must have reserved
6906 way too many registers.
6907 But since all this is rather unlikely to happen and
6908 would require extra testing, we just die if r4 / r5
6909 are not available. */
6910 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6911 && !global_regs[4] && !global_regs[5]);
6913 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6914 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6915 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6916 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6917 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6918 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6919 emit_move_insn (mem, tmp_reg);
6920 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6921 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6922 emit_move_insn (mem, tmp_reg);
6923 emit_move_insn (reg, adj_reg);
6924 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6925 emit_move_insn (adj_reg, mem);
6926 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6927 emit_move_insn (tmp_reg, mem);
6928 /* Tell flow the insns that pop r4/r5 aren't dead. */
6929 emit_use (tmp_reg);
6930 emit_use (adj_reg);
6931 return;
6933 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6935 /* If SIZE is negative, subtract the positive value.
6936 This sometimes allows a constant pool entry to be shared
6937 between prologue and epilogue code. */
6938 if (size < 0)
6940 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6941 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6943 else
6945 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6946 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6948 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6949 gen_rtx_SET (VOIDmode, reg,
6950 gen_rtx_PLUS (SImode, reg,
6951 GEN_INT (size))));
6956 /* Emit the specified insn and mark it as frame related.
6957 FIXME: Rename this to emit_frame_insn. */
6958 static rtx
6959 frame_insn (rtx x)
6961 x = emit_insn (x);
6962 RTX_FRAME_RELATED_P (x) = 1;
6963 return x;
6966 /* Output RTL to push register RN onto the stack. */
6967 static rtx
6968 push (int rn)
6970 rtx x;
6971 if (rn == FPUL_REG)
6972 x = gen_push_fpul ();
6973 else if (rn == FPSCR_REG)
6974 x = gen_push_fpscr ();
6975 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6976 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6978 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6979 return NULL_RTX;
6980 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6982 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6983 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6984 else
6985 x = gen_push (gen_rtx_REG (SImode, rn));
6987 x = frame_insn (x);
6988 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6989 return x;
6992 /* Output RTL to pop register RN from the stack. */
6993 static void
6994 pop (int rn)
6996 rtx x, sp_reg, reg;
6997 if (rn == FPUL_REG)
6998 x = gen_pop_fpul ();
6999 else if (rn == FPSCR_REG)
7000 x = gen_pop_fpscr ();
7001 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7002 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7004 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7005 return;
7006 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
7008 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7009 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
7010 else
7011 x = gen_pop (gen_rtx_REG (SImode, rn));
7013 x = emit_insn (x);
7015 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7016 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7017 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7018 : SET_DEST (PATTERN (x)));
7019 add_reg_note (x, REG_CFA_RESTORE, reg);
7020 add_reg_note (x, REG_CFA_ADJUST_CFA,
7021 gen_rtx_SET (SImode, sp_reg,
7022 plus_constant (SImode, sp_reg,
7023 GET_MODE_SIZE (GET_MODE (reg)))));
7024 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7025 RTX_FRAME_RELATED_P (x) = 1;
7028 /* Generate code to push the regs specified in the mask. */
7029 static void
7030 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7032 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7033 int skip_fpscr = 0;
7035 /* Push PR last; this gives better latencies after the prologue, and
7036 candidates for the return delay slot when there are no general
7037 registers pushed. */
7038 for (; i < FIRST_PSEUDO_REGISTER; i++)
7040 /* If this is an interrupt handler, and the SZ bit varies,
7041 and we have to push any floating point register, we need
7042 to switch to the correct precision first. */
7043 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7044 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7046 HARD_REG_SET unsaved;
7048 push (FPSCR_REG);
7049 COMPL_HARD_REG_SET (unsaved, *mask);
7050 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7051 skip_fpscr = 1;
7053 if (i != PR_REG
7054 && (i != FPSCR_REG || ! skip_fpscr)
7055 && TEST_HARD_REG_BIT (*mask, i))
7057 /* If the ISR has RESBANK attribute assigned, don't push any of
7058 the following registers - R0-R14, MACH, MACL and GBR. */
7059 if (! (sh_cfun_resbank_handler_p ()
7060 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7061 || i == MACH_REG
7062 || i == MACL_REG
7063 || i == GBR_REG)))
7064 push (i);
7068 /* Push banked registers last to improve delay slot opportunities. */
7069 if (interrupt_handler)
7071 bool use_movml = false;
7073 if (TARGET_SH2A)
7075 unsigned int count = 0;
7077 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7078 if (TEST_HARD_REG_BIT (*mask, i))
7079 count++;
7080 else
7081 break;
7083 /* Use movml when all banked registers are pushed. */
7084 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7085 use_movml = true;
7088 if (sh_cfun_resbank_handler_p ())
7089 ; /* Do nothing. */
7090 else if (use_movml)
7092 rtx x, mem, reg, set;
7093 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7095 /* We must avoid scheduling multiple store insn with another
7096 insns. */
7097 emit_insn (gen_blockage ());
7098 x = gen_movml_push_banked (sp_reg);
7099 x = frame_insn (x);
7100 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7102 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7103 reg = gen_rtx_REG (SImode, i);
7104 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
7107 set = gen_rtx_SET (SImode, sp_reg,
7108 plus_constant (Pmode, sp_reg, - 32));
7109 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7110 emit_insn (gen_blockage ());
7112 else
7113 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7114 if (TEST_HARD_REG_BIT (*mask, i))
7115 push (i);
7118 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7119 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7120 push (PR_REG);
7123 /* Calculate how much extra space is needed to save all callee-saved
7124 target registers.
7125 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7126 static int
7127 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7129 int reg;
7130 int stack_space = 0;
7131 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7133 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7134 if ((! call_really_used_regs[reg] || interrupt_handler)
7135 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7136 /* Leave space to save this target register on the stack,
7137 in case target register allocation wants to use it. */
7138 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7139 return stack_space;
7142 /* Decide whether we should reserve space for callee-save target registers,
7143 in case target register allocation wants to use them. REGS_SAVED is
7144 the space, in bytes, that is already required for register saves.
7145 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7146 static int
7147 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7148 HARD_REG_SET *live_regs_mask)
7150 if (optimize_size)
7151 return 0;
7152 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7155 /* Decide how much space to reserve for callee-save target registers
7156 in case target register allocation wants to use them.
7157 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7158 static int
7159 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7161 if (shmedia_space_reserved_for_target_registers)
7162 return shmedia_target_regs_stack_space (live_regs_mask);
7163 else
7164 return 0;
7167 /* Work out the registers which need to be saved, both as a mask and a
7168 count of saved words. Return the count.
7170 If doing a pragma interrupt function, then push all regs used by the
7171 function, and if we call another function (we can tell by looking at PR),
7172 make sure that all the regs it clobbers are safe too. */
7173 static int
7174 calc_live_regs (HARD_REG_SET *live_regs_mask)
7176 unsigned int reg;
7177 int count;
7178 tree attrs;
7179 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7180 bool nosave_low_regs;
7181 int pr_live, has_call;
7183 attrs = DECL_ATTRIBUTES (current_function_decl);
7184 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7185 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7186 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7187 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7189 CLEAR_HARD_REG_SET (*live_regs_mask);
7190 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7191 && df_regs_ever_live_p (FPSCR_REG))
7192 target_flags &= ~MASK_FPU_SINGLE;
7193 /* If we can save a lot of saves by switching to double mode, do that. */
7194 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7195 && TARGET_FPU_SINGLE)
7196 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7197 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7198 && (! call_really_used_regs[reg]
7199 || interrupt_handler)
7200 && ++count > 2)
7202 target_flags &= ~MASK_FPU_SINGLE;
7203 break;
7205 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7206 knows how to use it. That means the pseudo originally allocated for
7207 the initial value can become the PR_MEDIA_REG hard register, as seen for
7208 execute/20010122-1.c:test9. */
7209 if (TARGET_SHMEDIA)
7210 /* ??? this function is called from initial_elimination_offset, hence we
7211 can't use the result of sh_media_register_for_return here. */
7212 pr_live = sh_pr_n_sets ();
7213 else
7215 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7216 pr_live = (pr_initial
7217 ? (!REG_P (pr_initial)
7218 || REGNO (pr_initial) != (PR_REG))
7219 : df_regs_ever_live_p (PR_REG));
7220 /* For Shcompact, if not optimizing, we end up with a memory reference
7221 using the return address pointer for __builtin_return_address even
7222 though there is no actual need to put the PR register on the stack. */
7223 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7225 /* Force PR to be live if the prologue has to call the SHmedia
7226 argument decoder or register saver. */
7227 if (TARGET_SHCOMPACT
7228 && ((crtl->args.info.call_cookie
7229 & ~ CALL_COOKIE_RET_TRAMP (1))
7230 || crtl->saves_all_registers))
7231 pr_live = 1;
7232 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7233 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7235 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7236 ? pr_live
7237 : interrupt_handler
7238 ? (/* Need to save all the regs ever live. */
7239 (df_regs_ever_live_p (reg)
7240 || (call_really_used_regs[reg]
7241 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7242 || reg == PIC_OFFSET_TABLE_REGNUM)
7243 && has_call)
7244 || (TARGET_SHMEDIA && has_call
7245 && REGISTER_NATURAL_MODE (reg) == SImode
7246 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7247 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7248 && reg != RETURN_ADDRESS_POINTER_REGNUM
7249 && reg != T_REG && reg != GBR_REG
7250 /* Push fpscr only on targets which have FPU */
7251 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7252 : (/* Only push those regs which are used and need to be saved. */
7253 (TARGET_SHCOMPACT
7254 && flag_pic
7255 && crtl->args.info.call_cookie
7256 && reg == PIC_OFFSET_TABLE_REGNUM)
7257 || (df_regs_ever_live_p (reg)
7258 && ((!call_really_used_regs[reg]
7259 && !(reg != PIC_OFFSET_TABLE_REGNUM
7260 && fixed_regs[reg] && call_used_regs[reg]))
7261 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7262 || (crtl->calls_eh_return
7263 && (reg == EH_RETURN_DATA_REGNO (0)
7264 || reg == EH_RETURN_DATA_REGNO (1)
7265 || reg == EH_RETURN_DATA_REGNO (2)
7266 || reg == EH_RETURN_DATA_REGNO (3)))
7267 || ((reg == MACL_REG || reg == MACH_REG)
7268 && df_regs_ever_live_p (reg)
7269 && sh_cfun_attr_renesas_p ())
7272 SET_HARD_REG_BIT (*live_regs_mask, reg);
7273 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7275 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7276 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7278 if (FP_REGISTER_P (reg))
7280 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7282 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7283 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7286 else if (XD_REGISTER_P (reg))
7288 /* Must switch to double mode to access these registers. */
7289 target_flags &= ~MASK_FPU_SINGLE;
7293 if (nosave_low_regs && reg == R8_REG)
7294 break;
7296 /* If we have a target register optimization pass after prologue / epilogue
7297 threading, we need to assume all target registers will be live even if
7298 they aren't now. */
7299 if (flag_branch_target_load_optimize2
7300 && TARGET_SAVE_ALL_TARGET_REGS
7301 && shmedia_space_reserved_for_target_registers)
7302 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7303 if ((! call_really_used_regs[reg] || interrupt_handler)
7304 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7306 SET_HARD_REG_BIT (*live_regs_mask, reg);
7307 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7309 /* If this is an interrupt handler, we don't have any call-clobbered
7310 registers we can conveniently use for target register save/restore.
7311 Make sure we save at least one general purpose register when we need
7312 to save target registers. */
7313 if (interrupt_handler
7314 && hard_reg_set_intersect_p (*live_regs_mask,
7315 reg_class_contents[TARGET_REGS])
7316 && ! hard_reg_set_intersect_p (*live_regs_mask,
7317 reg_class_contents[GENERAL_REGS]))
7319 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7320 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7323 return count;
7326 /* Code to generate prologue and epilogue sequences */
7328 /* PUSHED is the number of bytes that are being pushed on the
7329 stack for register saves. Return the frame size, padded
7330 appropriately so that the stack stays properly aligned. */
7331 static HOST_WIDE_INT
7332 rounded_frame_size (int pushed)
7334 HOST_WIDE_INT size = get_frame_size ();
7335 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7337 if (ACCUMULATE_OUTGOING_ARGS)
7338 size += crtl->outgoing_args_size;
7340 return ((size + pushed + align - 1) & -align) - pushed;
7343 /* Choose a call-clobbered target-branch register that remains
7344 unchanged along the whole function. We set it up as the return
7345 value in the prologue. */
7347 sh_media_register_for_return (void)
7349 int regno;
7350 int tr0_used;
7352 if (! crtl->is_leaf)
7353 return -1;
7354 if (lookup_attribute ("interrupt_handler",
7355 DECL_ATTRIBUTES (current_function_decl)))
7356 return -1;
7357 if (sh_cfun_interrupt_handler_p ())
7358 return -1;
7360 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7362 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7363 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7364 return regno;
7366 return -1;
7369 /* The maximum registers we need to save are:
7370 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7371 - 32 floating point registers (for each pair, we save none,
7372 one single precision value, or a double precision value).
7373 - 8 target registers
7374 - add 1 entry for a delimiter. */
7375 #define MAX_SAVED_REGS (62+32+8)
7377 typedef struct save_entry_s
7379 unsigned char reg;
7380 unsigned char mode;
7381 short offset;
7382 } save_entry;
7384 #define MAX_TEMPS 4
7386 /* There will be a delimiter entry with VOIDmode both at the start and the
7387 end of a filled in schedule. The end delimiter has the offset of the
7388 save with the smallest (i.e. most negative) offset. */
7389 typedef struct save_schedule_s
7391 save_entry entries[MAX_SAVED_REGS + 2];
7392 int temps[MAX_TEMPS+1];
7393 } save_schedule;
7395 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7396 use reverse order. Returns the last entry written to (not counting
7397 the delimiter). OFFSET_BASE is a number to be added to all offset
7398 entries. */
7399 static save_entry *
7400 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7401 int offset_base)
7403 int align, i;
7404 save_entry *entry = schedule->entries;
7405 int tmpx = 0;
7406 int offset;
7408 if (! current_function_interrupt)
7409 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7410 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7411 && ! FUNCTION_ARG_REGNO_P (i)
7412 && i != FIRST_RET_REG
7413 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7414 && ! (crtl->calls_eh_return
7415 && (i == EH_RETURN_STACKADJ_REGNO
7416 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7417 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7418 schedule->temps[tmpx++] = i;
7419 entry->reg = -1;
7420 entry->mode = VOIDmode;
7421 entry->offset = offset_base;
7422 entry++;
7423 /* We loop twice: first, we save 8-byte aligned registers in the
7424 higher addresses, that are known to be aligned. Then, we
7425 proceed to saving 32-bit registers that don't need 8-byte
7426 alignment.
7427 If this is an interrupt function, all registers that need saving
7428 need to be saved in full. moreover, we need to postpone saving
7429 target registers till we have saved some general purpose registers
7430 we can then use as scratch registers. */
7431 offset = offset_base;
7432 for (align = 1; align >= 0; align--)
7434 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7435 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7437 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
7438 int reg = i;
7440 if (current_function_interrupt)
7442 if (TARGET_REGISTER_P (i))
7443 continue;
7444 if (GENERAL_REGISTER_P (i))
7445 mode = DImode;
7447 if (mode == SFmode && (i % 2) == 1
7448 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7449 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7451 mode = DFmode;
7452 i--;
7453 reg--;
7456 /* If we're doing the aligned pass and this is not aligned,
7457 or we're doing the unaligned pass and this is aligned,
7458 skip it. */
7459 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7460 != align)
7461 continue;
7463 if (current_function_interrupt
7464 && GENERAL_REGISTER_P (i)
7465 && tmpx < MAX_TEMPS)
7466 schedule->temps[tmpx++] = i;
7468 offset -= GET_MODE_SIZE (mode);
7469 entry->reg = i;
7470 entry->mode = mode;
7471 entry->offset = offset;
7472 entry++;
7474 if (align && current_function_interrupt)
7475 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7476 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7478 offset -= GET_MODE_SIZE (DImode);
7479 entry->reg = i;
7480 entry->mode = DImode;
7481 entry->offset = offset;
7482 entry++;
7485 entry->reg = -1;
7486 entry->mode = VOIDmode;
7487 entry->offset = offset;
7488 schedule->temps[tmpx] = -1;
7489 return entry - 1;
7492 /* Expand code for the function prologue. */
7493 void
7494 sh_expand_prologue (void)
7496 HARD_REG_SET live_regs_mask;
7497 int d, i;
7498 int d_rounding = 0;
7499 int save_flags = target_flags;
7500 int pretend_args;
7501 int stack_usage;
7502 tree sp_switch_attr
7503 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7505 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7507 /* We have pretend args if we had an object sent partially in registers
7508 and partially on the stack, e.g. a large structure. */
7509 pretend_args = crtl->args.pretend_args_size;
7510 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7511 && (NPARM_REGS(SImode)
7512 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7513 pretend_args = 0;
7515 output_stack_adjust (-pretend_args
7516 - crtl->args.info.stack_regs * 8,
7517 stack_pointer_rtx, 0, NULL, true);
7518 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7520 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7521 /* We're going to use the PIC register to load the address of the
7522 incoming-argument decoder and/or of the return trampoline from
7523 the GOT, so make sure the PIC register is preserved and
7524 initialized. */
7525 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7527 if (TARGET_SHCOMPACT
7528 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7530 int reg;
7532 /* First, make all registers with incoming arguments that will
7533 be pushed onto the stack live, so that register renaming
7534 doesn't overwrite them. */
7535 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7536 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7537 >= NPARM_REGS (SImode) - reg)
7538 for (; reg < NPARM_REGS (SImode); reg++)
7539 emit_insn (gen_shcompact_preserve_incoming_args
7540 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7541 else if (CALL_COOKIE_INT_REG_GET
7542 (crtl->args.info.call_cookie, reg) == 1)
7543 emit_insn (gen_shcompact_preserve_incoming_args
7544 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7546 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7547 stack_pointer_rtx);
7548 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7549 GEN_INT (crtl->args.info.call_cookie));
7550 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7551 gen_rtx_REG (SImode, R0_REG));
7553 else if (TARGET_SHMEDIA)
7555 int tr = sh_media_register_for_return ();
7557 if (tr >= 0)
7558 emit_move_insn (gen_rtx_REG (DImode, tr),
7559 gen_rtx_REG (DImode, PR_MEDIA_REG));
7562 /* Emit the code for SETUP_VARARGS. */
7563 if (cfun->stdarg)
7565 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7567 /* Push arg regs as if they'd been provided by caller in stack. */
7568 for (i = 0; i < NPARM_REGS(SImode); i++)
7570 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7572 if (i >= (NPARM_REGS(SImode)
7573 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7575 break;
7576 push (rn);
7577 stack_usage += GET_MODE_SIZE (SImode);
7582 /* If we're supposed to switch stacks at function entry, do so now. */
7583 if (sp_switch_attr)
7585 rtx lab, newsrc;
7586 /* The argument specifies a variable holding the address of the
7587 stack the interrupt function should switch to/from at entry/exit. */
7588 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7589 const char *s
7590 = ggc_strdup (TREE_STRING_POINTER (arg));
7591 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7593 lab = add_constant (sp_switch, SImode, 0);
7594 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7596 emit_insn (gen_sp_switch_1 (newsrc));
7599 d = calc_live_regs (&live_regs_mask);
7600 /* ??? Maybe we could save some switching if we can move a mode switch
7601 that already happens to be at the function start into the prologue. */
7602 if (target_flags != save_flags && ! current_function_interrupt)
7603 emit_insn (gen_toggle_sz ());
7605 if (TARGET_SH5)
7607 int offset_base, offset;
7608 rtx r0 = NULL_RTX;
7609 int offset_in_r0 = -1;
7610 int sp_in_r0 = 0;
7611 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7612 int total_size, save_size;
7613 save_schedule schedule;
7614 save_entry *entry;
7615 int *tmp_pnt;
7617 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7618 && ! current_function_interrupt)
7619 r0 = gen_rtx_REG (Pmode, R0_REG);
7621 /* D is the actual number of bytes that we need for saving registers,
7622 however, in initial_elimination_offset we have committed to using
7623 an additional TREGS_SPACE amount of bytes - in order to keep both
7624 addresses to arguments supplied by the caller and local variables
7625 valid, we must keep this gap. Place it between the incoming
7626 arguments and the actually saved registers in a bid to optimize
7627 locality of reference. */
7628 total_size = d + tregs_space;
7629 total_size += rounded_frame_size (total_size);
7630 save_size = total_size - rounded_frame_size (d);
7631 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7632 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7633 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7635 /* If adjusting the stack in a single step costs nothing extra, do so.
7636 I.e. either if a single addi is enough, or we need a movi anyway,
7637 and we don't exceed the maximum offset range (the test for the
7638 latter is conservative for simplicity). */
7639 if (TARGET_SHMEDIA
7640 && (CONST_OK_FOR_I10 (-total_size)
7641 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7642 && total_size <= 2044)))
7643 d_rounding = total_size - save_size;
7645 offset_base = d + d_rounding;
7647 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7648 0, NULL, true);
7649 stack_usage += save_size + d_rounding;
7651 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7652 tmp_pnt = schedule.temps;
7653 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7655 enum machine_mode mode = (enum machine_mode) entry->mode;
7656 unsigned int reg = entry->reg;
7657 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7658 rtx orig_reg_rtx;
7660 offset = entry->offset;
7662 reg_rtx = gen_rtx_REG (mode, reg);
7664 mem_rtx = gen_frame_mem (mode,
7665 gen_rtx_PLUS (Pmode,
7666 stack_pointer_rtx,
7667 GEN_INT (offset)));
7669 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7671 gcc_assert (r0);
7672 mem_rtx = NULL_RTX;
7675 if (HAVE_PRE_DECREMENT
7676 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7677 || mem_rtx == NULL_RTX
7678 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7680 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7682 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7683 pre_dec = NULL_RTX;
7684 else
7686 mem_rtx = NULL_RTX;
7687 offset += GET_MODE_SIZE (mode);
7691 if (mem_rtx != NULL_RTX)
7692 goto addr_ok;
7694 if (offset_in_r0 == -1)
7696 emit_move_insn (r0, GEN_INT (offset));
7697 offset_in_r0 = offset;
7699 else if (offset != offset_in_r0)
7701 emit_move_insn (r0,
7702 gen_rtx_PLUS
7703 (Pmode, r0,
7704 GEN_INT (offset - offset_in_r0)));
7705 offset_in_r0 += offset - offset_in_r0;
7708 if (pre_dec != NULL_RTX)
7710 if (! sp_in_r0)
7712 emit_move_insn (r0,
7713 gen_rtx_PLUS
7714 (Pmode, r0, stack_pointer_rtx));
7715 sp_in_r0 = 1;
7718 offset -= GET_MODE_SIZE (mode);
7719 offset_in_r0 -= GET_MODE_SIZE (mode);
7721 mem_rtx = pre_dec;
7723 else if (sp_in_r0)
7724 mem_rtx = gen_frame_mem (mode, r0);
7725 else
7726 mem_rtx = gen_frame_mem (mode,
7727 gen_rtx_PLUS (Pmode,
7728 stack_pointer_rtx,
7729 r0));
7731 /* We must not use an r0-based address for target-branch
7732 registers or for special registers without pre-dec
7733 memory addresses, since we store their values in r0
7734 first. */
7735 gcc_assert (!TARGET_REGISTER_P (reg)
7736 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7737 || mem_rtx == pre_dec));
7739 addr_ok:
7740 orig_reg_rtx = reg_rtx;
7741 if (TARGET_REGISTER_P (reg)
7742 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7743 && mem_rtx != pre_dec))
7745 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7747 emit_move_insn (tmp_reg, reg_rtx);
7749 if (REGNO (tmp_reg) == R0_REG)
7751 offset_in_r0 = -1;
7752 sp_in_r0 = 0;
7753 gcc_assert (!refers_to_regno_p
7754 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7757 if (*++tmp_pnt <= 0)
7758 tmp_pnt = schedule.temps;
7760 reg_rtx = tmp_reg;
7763 rtx insn;
7765 /* Mark as interesting for dwarf cfi generator */
7766 insn = emit_move_insn (mem_rtx, reg_rtx);
7767 RTX_FRAME_RELATED_P (insn) = 1;
7768 /* If we use an intermediate register for the save, we can't
7769 describe this exactly in cfi as a copy of the to-be-saved
7770 register into the temporary register and then the temporary
7771 register on the stack, because the temporary register can
7772 have a different natural size than the to-be-saved register.
7773 Thus, we gloss over the intermediate copy and pretend we do
7774 a direct save from the to-be-saved register. */
7775 if (REGNO (reg_rtx) != reg)
7777 rtx set;
7779 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7780 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7783 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7785 rtx reg_rtx = gen_rtx_REG (mode, reg);
7786 rtx set;
7787 rtx mem_rtx = gen_frame_mem (mode,
7788 gen_rtx_PLUS (Pmode,
7789 stack_pointer_rtx,
7790 GEN_INT (offset)));
7792 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7793 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7798 gcc_assert (entry->offset == d_rounding);
7800 else
7802 push_regs (&live_regs_mask, current_function_interrupt);
7803 stack_usage += d;
7806 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7807 emit_insn (gen_GOTaddr2picreg ());
7809 if (SHMEDIA_REGS_STACK_ADJUST ())
7811 /* This must NOT go through the PLT, otherwise mach and macl
7812 may be clobbered. */
7813 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7814 (TARGET_FPU_ANY
7815 ? "__GCC_push_shmedia_regs"
7816 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7817 emit_insn (gen_shmedia_save_restore_regs_compact
7818 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7821 if (target_flags != save_flags && ! current_function_interrupt)
7822 emit_insn (gen_toggle_sz ());
7824 target_flags = save_flags;
7826 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7827 stack_pointer_rtx, 0, NULL, true);
7828 stack_usage += rounded_frame_size (d) - d_rounding;
7830 if (frame_pointer_needed)
7831 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7833 if (TARGET_SHCOMPACT
7834 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7836 /* This must NOT go through the PLT, otherwise mach and macl
7837 may be clobbered. */
7838 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7839 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7840 emit_insn (gen_shcompact_incoming_args ());
7843 /* If we are profiling, make sure no instructions are scheduled before
7844 the call to mcount. Similarly if some call instructions are swapped
7845 before frame related insns, it'll confuse the unwinder because
7846 currently SH has no unwind info for function epilogues. */
7847 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7848 emit_insn (gen_blockage ());
7850 if (flag_stack_usage_info)
7851 current_function_static_stack_size = stack_usage;
7854 /* Expand code for the function epilogue. */
7855 void
7856 sh_expand_epilogue (bool sibcall_p)
7858 HARD_REG_SET live_regs_mask;
7859 int d, i;
7860 int d_rounding = 0;
7862 int save_flags = target_flags;
7863 int frame_size, save_size;
7864 int fpscr_deferred = 0;
7865 int e = sibcall_p ? -1 : 1;
7867 d = calc_live_regs (&live_regs_mask);
7869 save_size = d;
7870 frame_size = rounded_frame_size (d);
7872 if (TARGET_SH5)
7874 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7875 int total_size;
7876 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7877 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7878 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7880 total_size = d + tregs_space;
7881 total_size += rounded_frame_size (total_size);
7882 save_size = total_size - frame_size;
7884 /* If adjusting the stack in a single step costs nothing extra, do so.
7885 I.e. either if a single addi is enough, or we need a movi anyway,
7886 and we don't exceed the maximum offset range (the test for the
7887 latter is conservative for simplicity). */
7888 if (TARGET_SHMEDIA
7889 && ! frame_pointer_needed
7890 && (CONST_OK_FOR_I10 (total_size)
7891 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7892 && total_size <= 2044)))
7893 d_rounding = frame_size;
7895 frame_size -= d_rounding;
7898 if (frame_pointer_needed)
7900 /* We must avoid scheduling the epilogue with previous basic blocks.
7901 See PR/18032 and PR/40313. */
7902 emit_insn (gen_blockage ());
7903 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7904 &live_regs_mask, true);
7906 /* We must avoid moving the stack pointer adjustment past code
7907 which reads from the local frame, else an interrupt could
7908 occur after the SP adjustment and clobber data in the local
7909 frame. */
7910 emit_insn (gen_blockage ());
7911 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7913 else if (frame_size)
7915 /* We must avoid moving the stack pointer adjustment past code
7916 which reads from the local frame, else an interrupt could
7917 occur after the SP adjustment and clobber data in the local
7918 frame. */
7919 emit_insn (gen_blockage ());
7920 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7921 &live_regs_mask, true);
7924 if (SHMEDIA_REGS_STACK_ADJUST ())
7926 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7927 (TARGET_FPU_ANY
7928 ? "__GCC_pop_shmedia_regs"
7929 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7930 /* This must NOT go through the PLT, otherwise mach and macl
7931 may be clobbered. */
7932 emit_insn (gen_shmedia_save_restore_regs_compact
7933 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7936 /* Pop all the registers. */
7938 if (target_flags != save_flags && ! current_function_interrupt)
7939 emit_insn (gen_toggle_sz ());
7940 if (TARGET_SH5)
7942 int offset_base, offset;
7943 int offset_in_r0 = -1;
7944 int sp_in_r0 = 0;
7945 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7946 save_schedule schedule;
7947 save_entry *entry;
7948 int *tmp_pnt;
7950 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7951 offset_base = -entry[1].offset + d_rounding;
7952 tmp_pnt = schedule.temps;
7953 for (; entry->mode != VOIDmode; entry--)
7955 enum machine_mode mode = (enum machine_mode) entry->mode;
7956 int reg = entry->reg;
7957 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7959 offset = offset_base + entry->offset;
7960 reg_rtx = gen_rtx_REG (mode, reg);
7962 mem_rtx = gen_frame_mem (mode,
7963 gen_rtx_PLUS (Pmode,
7964 stack_pointer_rtx,
7965 GEN_INT (offset)));
7967 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7968 mem_rtx = NULL_RTX;
7970 if (HAVE_POST_INCREMENT
7971 && (offset == offset_in_r0
7972 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7973 && mem_rtx == NULL_RTX)
7974 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7976 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7978 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7979 post_inc = NULL_RTX;
7980 else
7981 mem_rtx = NULL_RTX;
7984 if (mem_rtx != NULL_RTX)
7985 goto addr_ok;
7987 if (offset_in_r0 == -1)
7989 emit_move_insn (r0, GEN_INT (offset));
7990 offset_in_r0 = offset;
7992 else if (offset != offset_in_r0)
7994 emit_move_insn (r0,
7995 gen_rtx_PLUS
7996 (Pmode, r0,
7997 GEN_INT (offset - offset_in_r0)));
7998 offset_in_r0 += offset - offset_in_r0;
8001 if (post_inc != NULL_RTX)
8003 if (! sp_in_r0)
8005 emit_move_insn (r0,
8006 gen_rtx_PLUS
8007 (Pmode, r0, stack_pointer_rtx));
8008 sp_in_r0 = 1;
8011 mem_rtx = post_inc;
8013 offset_in_r0 += GET_MODE_SIZE (mode);
8015 else if (sp_in_r0)
8016 mem_rtx = gen_frame_mem (mode, r0);
8017 else
8018 mem_rtx = gen_frame_mem (mode,
8019 gen_rtx_PLUS (Pmode,
8020 stack_pointer_rtx,
8021 r0));
8023 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8024 || mem_rtx == post_inc);
8026 addr_ok:
8027 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8028 && mem_rtx != post_inc)
8030 emit_move_insn (r0, mem_rtx);
8031 mem_rtx = r0;
8033 else if (TARGET_REGISTER_P (reg))
8035 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8037 /* Give the scheduler a bit of freedom by using up to
8038 MAX_TEMPS registers in a round-robin fashion. */
8039 emit_move_insn (tmp_reg, mem_rtx);
8040 mem_rtx = tmp_reg;
8041 if (*++tmp_pnt < 0)
8042 tmp_pnt = schedule.temps;
8045 emit_move_insn (reg_rtx, mem_rtx);
8048 gcc_assert (entry->offset + offset_base == d + d_rounding);
8050 else /* ! TARGET_SH5 */
8052 int last_reg;
8054 save_size = 0;
8055 /* For an ISR with RESBANK attribute assigned, don't pop PR
8056 register. */
8057 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8058 && !sh_cfun_resbank_handler_p ())
8060 if (!frame_pointer_needed)
8061 emit_insn (gen_blockage ());
8062 pop (PR_REG);
8065 /* Banked registers are popped first to avoid being scheduled in the
8066 delay slot. RTE switches banks before the ds instruction. */
8067 if (current_function_interrupt)
8069 bool use_movml = false;
8071 if (TARGET_SH2A)
8073 unsigned int count = 0;
8075 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8076 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8077 count++;
8078 else
8079 break;
8081 /* Use movml when all banked register are poped. */
8082 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8083 use_movml = true;
8086 if (sh_cfun_resbank_handler_p ())
8087 ; /* Do nothing. */
8088 else if (use_movml)
8090 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8092 /* We must avoid scheduling multiple load insn with another
8093 insns. */
8094 emit_insn (gen_blockage ());
8095 emit_insn (gen_movml_pop_banked (sp_reg));
8096 emit_insn (gen_blockage ());
8098 else
8099 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8100 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8101 pop (i);
8103 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8105 else
8106 last_reg = FIRST_PSEUDO_REGISTER;
8108 for (i = 0; i < last_reg; i++)
8110 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8112 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8113 && hard_reg_set_intersect_p (live_regs_mask,
8114 reg_class_contents[DF_REGS]))
8115 fpscr_deferred = 1;
8116 /* For an ISR with RESBANK attribute assigned, don't pop
8117 following registers, R0-R14, MACH, MACL and GBR. */
8118 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8119 && ! (sh_cfun_resbank_handler_p ()
8120 && ((j >= FIRST_GENERAL_REG
8121 && j < LAST_GENERAL_REG)
8122 || j == MACH_REG
8123 || j == MACL_REG
8124 || j == GBR_REG)))
8125 pop (j);
8127 if (j == FIRST_FP_REG && fpscr_deferred)
8128 pop (FPSCR_REG);
8131 if (target_flags != save_flags && ! current_function_interrupt)
8132 emit_insn (gen_toggle_sz ());
8133 target_flags = save_flags;
8135 output_stack_adjust (crtl->args.pretend_args_size
8136 + save_size + d_rounding
8137 + crtl->args.info.stack_regs * 8,
8138 stack_pointer_rtx, e, NULL, true);
8140 if (crtl->calls_eh_return)
8141 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8142 EH_RETURN_STACKADJ_RTX));
8144 /* Switch back to the normal stack if necessary. */
8145 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8146 emit_insn (gen_sp_switch_2 ());
8148 /* Tell flow the insn that pops PR isn't dead. */
8149 /* PR_REG will never be live in SHmedia mode, and we don't need to
8150 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8151 by the return pattern. */
8152 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8153 emit_use (gen_rtx_REG (SImode, PR_REG));
8156 /* Emit code to change the current function's return address to RA.
8157 TEMP is available as a scratch register, if needed. */
8158 void
8159 sh_set_return_address (rtx ra, rtx tmp)
8161 HARD_REG_SET live_regs_mask;
8162 int d;
8163 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8164 int pr_offset;
8166 d = calc_live_regs (&live_regs_mask);
8168 /* If pr_reg isn't life, we can set it (or the register given in
8169 sh_media_register_for_return) directly. */
8170 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8172 rtx rr;
8174 if (TARGET_SHMEDIA)
8176 int rr_regno = sh_media_register_for_return ();
8178 if (rr_regno < 0)
8179 rr_regno = pr_reg;
8181 rr = gen_rtx_REG (DImode, rr_regno);
8183 else
8184 rr = gen_rtx_REG (SImode, pr_reg);
8186 emit_insn (GEN_MOV (rr, ra));
8187 /* Tell flow the register for return isn't dead. */
8188 emit_use (rr);
8189 return;
8192 if (TARGET_SH5)
8194 int offset;
8195 save_schedule schedule;
8196 save_entry *entry;
8198 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8199 offset = entry[1].offset;
8200 for (; entry->mode != VOIDmode; entry--)
8201 if (entry->reg == pr_reg)
8202 goto found;
8204 /* We can't find pr register. */
8205 gcc_unreachable ();
8207 found:
8208 offset = entry->offset - offset;
8209 pr_offset = (rounded_frame_size (d) + offset
8210 + SHMEDIA_REGS_STACK_ADJUST ());
8212 else
8213 pr_offset = rounded_frame_size (d);
8215 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8217 if (frame_pointer_needed)
8218 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8219 else
8220 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8222 tmp = gen_frame_mem (Pmode, tmp);
8223 emit_insn (GEN_MOV (tmp, ra));
8224 /* Tell this store isn't dead. */
8225 emit_use (tmp);
8228 /* Clear variables at function end. */
8229 static void
8230 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8231 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8235 static rtx
8236 sh_builtin_saveregs (void)
8238 /* First unnamed integer register. */
8239 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8240 /* Number of integer registers we need to save. */
8241 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8242 /* First unnamed SFmode float reg */
8243 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8244 /* Number of SFmode float regs to save. */
8245 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8246 rtx regbuf, fpregs;
8247 int bufsize, regno;
8248 alias_set_type alias_set;
8250 if (TARGET_SH5)
8252 if (n_intregs)
8254 int pushregs = n_intregs;
8256 while (pushregs < NPARM_REGS (SImode) - 1
8257 && (CALL_COOKIE_INT_REG_GET
8258 (crtl->args.info.call_cookie,
8259 NPARM_REGS (SImode) - pushregs)
8260 == 1))
8262 crtl->args.info.call_cookie
8263 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8264 - pushregs, 1);
8265 pushregs++;
8268 if (pushregs == NPARM_REGS (SImode))
8269 crtl->args.info.call_cookie
8270 |= (CALL_COOKIE_INT_REG (0, 1)
8271 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8272 else
8273 crtl->args.info.call_cookie
8274 |= CALL_COOKIE_STACKSEQ (pushregs);
8276 crtl->args.pretend_args_size += 8 * n_intregs;
8278 if (TARGET_SHCOMPACT)
8279 return const0_rtx;
8282 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8284 error ("__builtin_saveregs not supported by this subtarget");
8285 return const0_rtx;
8288 if (TARGET_SHMEDIA)
8289 n_floatregs = 0;
8291 /* Allocate block of memory for the regs. */
8292 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8293 Or can assign_stack_local accept a 0 SIZE argument? */
8294 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8296 if (TARGET_SHMEDIA)
8297 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8298 else if (n_floatregs & 1)
8300 rtx addr;
8302 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8303 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8304 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8305 regbuf = change_address (regbuf, BLKmode, addr);
8307 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8309 rtx addr, mask;
8311 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8312 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8313 XEXP (regbuf, 0), 4));
8314 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8315 emit_insn (gen_andsi3 (addr, addr, mask));
8316 regbuf = change_address (regbuf, BLKmode, addr);
8318 else
8319 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8320 alias_set = get_varargs_alias_set ();
8321 set_mem_alias_set (regbuf, alias_set);
8323 /* Save int args.
8324 This is optimized to only save the regs that are necessary. Explicitly
8325 named args need not be saved. */
8326 if (n_intregs > 0)
8327 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8328 adjust_address (regbuf, BLKmode,
8329 n_floatregs * UNITS_PER_WORD),
8330 n_intregs);
8332 if (TARGET_SHMEDIA)
8333 /* Return the address of the regbuf. */
8334 return XEXP (regbuf, 0);
8336 /* Save float args.
8337 This is optimized to only save the regs that are necessary. Explicitly
8338 named args need not be saved.
8339 We explicitly build a pointer to the buffer because it halves the insn
8340 count when not optimizing (otherwise the pointer is built for each reg
8341 saved).
8342 We emit the moves in reverse order so that we can use predecrement. */
8344 fpregs = copy_to_mode_reg (Pmode,
8345 plus_constant (Pmode, XEXP (regbuf, 0),
8346 n_floatregs * UNITS_PER_WORD));
8347 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8349 rtx mem;
8350 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8352 emit_insn (gen_addsi3 (fpregs, fpregs,
8353 GEN_INT (-2 * UNITS_PER_WORD)));
8354 mem = change_address (regbuf, DFmode, fpregs);
8355 emit_move_insn (mem,
8356 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8358 regno = first_floatreg;
8359 if (regno & 1)
8361 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8362 mem = change_address (regbuf, SFmode, fpregs);
8363 emit_move_insn (mem,
8364 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
8365 - (TARGET_LITTLE_ENDIAN != 0)));
8368 else
8369 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8371 rtx mem;
8373 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8374 mem = change_address (regbuf, SFmode, fpregs);
8375 emit_move_insn (mem,
8376 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8379 /* Return the address of the regbuf. */
8380 return XEXP (regbuf, 0);
8383 /* Define the `__builtin_va_list' type for the ABI. */
8384 static tree
8385 sh_build_builtin_va_list (void)
8387 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8388 tree record, type_decl;
8390 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8391 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8392 return ptr_type_node;
8394 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8395 type_decl = build_decl (BUILTINS_LOCATION,
8396 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8398 f_next_o = build_decl (BUILTINS_LOCATION,
8399 FIELD_DECL, get_identifier ("__va_next_o"),
8400 ptr_type_node);
8401 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8402 FIELD_DECL,
8403 get_identifier ("__va_next_o_limit"),
8404 ptr_type_node);
8405 f_next_fp = build_decl (BUILTINS_LOCATION,
8406 FIELD_DECL, get_identifier ("__va_next_fp"),
8407 ptr_type_node);
8408 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8409 FIELD_DECL,
8410 get_identifier ("__va_next_fp_limit"),
8411 ptr_type_node);
8412 f_next_stack = build_decl (BUILTINS_LOCATION,
8413 FIELD_DECL, get_identifier ("__va_next_stack"),
8414 ptr_type_node);
8416 DECL_FIELD_CONTEXT (f_next_o) = record;
8417 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8418 DECL_FIELD_CONTEXT (f_next_fp) = record;
8419 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8420 DECL_FIELD_CONTEXT (f_next_stack) = record;
8422 TYPE_STUB_DECL (record) = type_decl;
8423 TYPE_NAME (record) = type_decl;
8424 TYPE_FIELDS (record) = f_next_o;
8425 DECL_CHAIN (f_next_o) = f_next_o_limit;
8426 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8427 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8428 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8430 layout_type (record);
8432 return record;
8435 /* Implement `va_start' for varargs and stdarg. */
8436 static void
8437 sh_va_start (tree valist, rtx nextarg)
8439 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8440 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8441 tree t, u;
8442 int nfp, nint;
8444 if (TARGET_SH5)
8446 expand_builtin_saveregs ();
8447 std_expand_builtin_va_start (valist, nextarg);
8448 return;
8451 if ((! TARGET_SH2E && ! TARGET_SH4)
8452 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8454 std_expand_builtin_va_start (valist, nextarg);
8455 return;
8458 f_next_o = TYPE_FIELDS (va_list_type_node);
8459 f_next_o_limit = DECL_CHAIN (f_next_o);
8460 f_next_fp = DECL_CHAIN (f_next_o_limit);
8461 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8462 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8464 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8465 NULL_TREE);
8466 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8467 valist, f_next_o_limit, NULL_TREE);
8468 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8469 NULL_TREE);
8470 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8471 valist, f_next_fp_limit, NULL_TREE);
8472 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8473 valist, f_next_stack, NULL_TREE);
8475 /* Call __builtin_saveregs. */
8476 u = make_tree (sizetype, expand_builtin_saveregs ());
8477 u = fold_convert (ptr_type_node, u);
8478 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8479 TREE_SIDE_EFFECTS (t) = 1;
8480 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8482 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8483 if (nfp < 8)
8484 nfp = 8 - nfp;
8485 else
8486 nfp = 0;
8487 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8488 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8489 TREE_SIDE_EFFECTS (t) = 1;
8490 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8492 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8493 TREE_SIDE_EFFECTS (t) = 1;
8494 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8496 nint = crtl->args.info.arg_count[SH_ARG_INT];
8497 if (nint < 4)
8498 nint = 4 - nint;
8499 else
8500 nint = 0;
8501 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8502 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8503 TREE_SIDE_EFFECTS (t) = 1;
8504 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8506 u = make_tree (ptr_type_node, nextarg);
8507 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8508 TREE_SIDE_EFFECTS (t) = 1;
8509 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8512 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8513 member, return it. */
8514 static tree
8515 find_sole_member (tree type)
8517 tree field, member = NULL_TREE;
8519 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8521 if (TREE_CODE (field) != FIELD_DECL)
8522 continue;
8523 if (!DECL_SIZE (field))
8524 return NULL_TREE;
8525 if (integer_zerop (DECL_SIZE (field)))
8526 continue;
8527 if (member)
8528 return NULL_TREE;
8529 member = field;
8531 return member;
8534 /* Implement `va_arg'. */
8535 static tree
8536 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8537 gimple_seq *post_p ATTRIBUTE_UNUSED)
8539 HOST_WIDE_INT size, rsize;
8540 tree tmp, pptr_type_node;
8541 tree addr, lab_over = NULL, result = NULL;
8542 bool pass_by_ref;
8543 tree eff_type;
8545 if (!VOID_TYPE_P (type))
8546 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8547 else
8548 pass_by_ref = false;
8550 if (pass_by_ref)
8551 type = build_pointer_type (type);
8553 size = int_size_in_bytes (type);
8554 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8555 pptr_type_node = build_pointer_type (ptr_type_node);
8557 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8558 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8560 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8561 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8562 int pass_as_float;
8563 tree lab_false;
8564 tree member;
8566 f_next_o = TYPE_FIELDS (va_list_type_node);
8567 f_next_o_limit = DECL_CHAIN (f_next_o);
8568 f_next_fp = DECL_CHAIN (f_next_o_limit);
8569 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8570 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8572 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8573 NULL_TREE);
8574 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8575 valist, f_next_o_limit, NULL_TREE);
8576 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8577 valist, f_next_fp, NULL_TREE);
8578 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8579 valist, f_next_fp_limit, NULL_TREE);
8580 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8581 valist, f_next_stack, NULL_TREE);
8583 /* Structures with a single member with a distinct mode are passed
8584 like their member. This is relevant if the latter has a REAL_TYPE
8585 or COMPLEX_TYPE type. */
8586 eff_type = type;
8587 while (TREE_CODE (eff_type) == RECORD_TYPE
8588 && (member = find_sole_member (eff_type))
8589 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8590 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8591 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8593 tree field_type = TREE_TYPE (member);
8595 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8596 eff_type = field_type;
8597 else
8599 gcc_assert ((TYPE_ALIGN (eff_type)
8600 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8601 || (TYPE_ALIGN (eff_type)
8602 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8603 break;
8607 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8609 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8610 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8611 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8612 && size <= 16));
8614 else
8616 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8619 addr = create_tmp_var (pptr_type_node, NULL);
8620 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8621 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8623 valist = build_simple_mem_ref (addr);
8625 if (pass_as_float)
8627 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8628 tree cmp;
8629 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8631 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8632 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8634 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8635 tmp = next_fp_limit;
8636 if (size > 4 && !is_double)
8637 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8638 tmp = build2 (GE_EXPR, boolean_type_node,
8639 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8640 cmp = build3 (COND_EXPR, void_type_node, tmp,
8641 build1 (GOTO_EXPR, void_type_node,
8642 unshare_expr (lab_false)), NULL_TREE);
8643 if (!is_double)
8644 gimplify_and_add (cmp, pre_p);
8646 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8647 || (is_double || size == 16))
8649 tmp = fold_convert (sizetype, next_fp_tmp);
8650 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8651 size_int (UNITS_PER_WORD));
8652 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8653 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8655 if (is_double)
8656 gimplify_and_add (cmp, pre_p);
8658 #ifdef FUNCTION_ARG_SCmode_WART
8659 if (TYPE_MODE (eff_type) == SCmode
8660 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8662 tree subtype = TREE_TYPE (eff_type);
8663 tree real, imag;
8665 imag
8666 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8667 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8669 real
8670 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8671 real = get_initialized_tmp_var (real, pre_p, NULL);
8673 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8674 if (type != eff_type)
8675 result = build1 (VIEW_CONVERT_EXPR, type, result);
8676 result = get_initialized_tmp_var (result, pre_p, NULL);
8678 #endif /* FUNCTION_ARG_SCmode_WART */
8680 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8681 gimplify_and_add (tmp, pre_p);
8683 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8684 gimplify_and_add (tmp, pre_p);
8686 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8687 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8688 gimplify_assign (unshare_expr (next_fp_tmp),
8689 unshare_expr (valist), pre_p);
8691 gimplify_assign (unshare_expr (valist),
8692 unshare_expr (next_fp_tmp), post_p);
8693 valist = next_fp_tmp;
8695 else
8697 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8698 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8699 unshare_expr (next_o_limit));
8700 tmp = build3 (COND_EXPR, void_type_node, tmp,
8701 build1 (GOTO_EXPR, void_type_node,
8702 unshare_expr (lab_false)),
8703 NULL_TREE);
8704 gimplify_and_add (tmp, pre_p);
8706 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8707 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8709 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8710 gimplify_and_add (tmp, pre_p);
8712 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8713 gimplify_and_add (tmp, pre_p);
8715 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8716 gimplify_assign (unshare_expr (next_o),
8717 unshare_expr (next_o_limit), pre_p);
8719 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8720 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8723 if (!result)
8725 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8726 gimplify_and_add (tmp, pre_p);
8730 /* ??? In va-sh.h, there had been code to make values larger than
8731 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8733 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8734 if (result)
8736 gimplify_assign (result, tmp, pre_p);
8737 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8738 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8739 gimplify_and_add (tmp, pre_p);
8741 else
8742 result = tmp;
8744 if (pass_by_ref)
8745 result = build_va_arg_indirect_ref (result);
8747 return result;
8750 /* 64 bit floating points memory transfers are paired single precision loads
8751 or store. So DWARF information needs fixing in little endian (unless
8752 PR=SZ=1 in FPSCR). */
8754 sh_dwarf_register_span (rtx reg)
8756 unsigned regno = REGNO (reg);
8758 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8759 return NULL_RTX;
8761 return
8762 gen_rtx_PARALLEL (VOIDmode,
8763 gen_rtvec (2,
8764 gen_rtx_REG (SFmode, regno + 1),
8765 gen_rtx_REG (SFmode, regno)));
8768 static enum machine_mode
8769 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8770 int *punsignedp, const_tree funtype,
8771 int for_return)
8773 if (sh_promote_prototypes (funtype))
8774 return promote_mode (type, mode, punsignedp);
8775 else
8776 return default_promote_function_mode (type, mode, punsignedp, funtype,
8777 for_return);
8780 static bool
8781 sh_promote_prototypes (const_tree type)
8783 if (TARGET_HITACHI)
8784 return false;
8785 if (! type)
8786 return true;
8787 return ! sh_attr_renesas_p (type);
8790 /* Whether an argument must be passed by reference. On SHcompact, we
8791 pretend arguments wider than 32-bits that would have been passed in
8792 registers are passed by reference, so that an SHmedia trampoline
8793 loads them into the full 64-bits registers. */
8794 static int
8795 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8796 const_tree type, bool named)
8798 unsigned HOST_WIDE_INT size;
8800 if (type)
8801 size = int_size_in_bytes (type);
8802 else
8803 size = GET_MODE_SIZE (mode);
8805 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8806 && (!named
8807 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8808 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8809 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8810 && size > 4
8811 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8812 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8813 return size;
8814 else
8815 return 0;
8818 static bool
8819 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8820 const_tree type, bool named)
8822 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8824 if (targetm.calls.must_pass_in_stack (mode, type))
8825 return true;
8827 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8828 wants to know about pass-by-reference semantics for incoming
8829 arguments. */
8830 if (! cum)
8831 return false;
8833 if (TARGET_SHCOMPACT)
8835 cum->byref = shcompact_byref (cum, mode, type, named);
8836 return cum->byref != 0;
8839 return false;
8842 static bool
8843 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8844 const_tree type, bool named ATTRIBUTE_UNUSED)
8846 /* ??? How can it possibly be correct to return true only on the
8847 caller side of the equation? Is there someplace else in the
8848 sh backend that's magically producing the copies? */
8849 return (get_cumulative_args (cum)->outgoing
8850 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8851 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8854 static int
8855 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8856 tree type, bool named ATTRIBUTE_UNUSED)
8858 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8859 int words = 0;
8861 if (!TARGET_SH5
8862 && PASS_IN_REG_P (*cum, mode, type)
8863 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8864 && (ROUND_REG (*cum, mode)
8865 + (mode != BLKmode
8866 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8867 : ROUND_ADVANCE (int_size_in_bytes (type)))
8868 > NPARM_REGS (mode)))
8869 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8871 else if (!TARGET_SHCOMPACT
8872 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8873 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8875 return words * UNITS_PER_WORD;
8879 /* Define where to put the arguments to a function.
8880 Value is zero to push the argument on the stack,
8881 or a hard register in which to store the argument.
8883 MODE is the argument's machine mode.
8884 TYPE is the data type of the argument (as a tree).
8885 This is null for libcalls where that information may
8886 not be available.
8887 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8888 the preceding args and about the function being called.
8889 NAMED is nonzero if this argument is a named parameter
8890 (otherwise it is an extra parameter matching an ellipsis).
8892 On SH the first args are normally in registers
8893 and the rest are pushed. Any arg that starts within the first
8894 NPARM_REGS words is at least partially passed in a register unless
8895 its data type forbids. */
8896 static rtx
8897 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8898 const_tree type, bool named)
8900 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8902 if (! TARGET_SH5 && mode == VOIDmode)
8903 return GEN_INT (ca->renesas_abi ? 1 : 0);
8905 if (! TARGET_SH5
8906 && PASS_IN_REG_P (*ca, mode, type)
8907 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8909 int regno;
8911 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8912 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8914 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8915 gen_rtx_REG (SFmode,
8916 BASE_ARG_REG (mode)
8917 + (ROUND_REG (*ca, mode) ^ 1)),
8918 const0_rtx);
8919 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8920 gen_rtx_REG (SFmode,
8921 BASE_ARG_REG (mode)
8922 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8923 GEN_INT (4));
8924 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8927 /* If the alignment of a DF value causes an SF register to be
8928 skipped, we will use that skipped register for the next SF
8929 value. */
8930 if ((TARGET_HITACHI || ca->renesas_abi)
8931 && ca->free_single_fp_reg
8932 && mode == SFmode)
8933 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8935 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8936 ^ (mode == SFmode && TARGET_SH4
8937 && TARGET_LITTLE_ENDIAN
8938 && ! TARGET_HITACHI && ! ca->renesas_abi);
8939 return gen_rtx_REG (mode, regno);
8943 if (TARGET_SH5)
8945 if (mode == VOIDmode && TARGET_SHCOMPACT)
8946 return GEN_INT (ca->call_cookie);
8948 /* The following test assumes unnamed arguments are promoted to
8949 DFmode. */
8950 if (mode == SFmode && ca->free_single_fp_reg)
8951 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8953 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8954 && (named || ! ca->prototype_p)
8955 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8957 if (! ca->prototype_p && TARGET_SHMEDIA)
8958 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8960 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8961 FIRST_FP_PARM_REG
8962 + ca->arg_count[(int) SH_ARG_FLOAT]);
8965 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8966 && (! TARGET_SHCOMPACT
8967 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8968 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8969 type, named))))
8971 return gen_rtx_REG (mode, (FIRST_PARM_REG
8972 + ca->arg_count[(int) SH_ARG_INT]));
8975 return NULL_RTX;
8978 return NULL_RTX;
8981 /* Update the data in CUM to advance over an argument
8982 of mode MODE and data type TYPE.
8983 (TYPE is null for libcalls where that information may not be
8984 available.) */
8985 static void
8986 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
8987 const_tree type, bool named)
8989 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8991 if (ca->force_mem)
8992 ca->force_mem = 0;
8993 else if (TARGET_SH5)
8995 const_tree type2 = (ca->byref && type
8996 ? TREE_TYPE (type)
8997 : type);
8998 enum machine_mode mode2 = (ca->byref && type
8999 ? TYPE_MODE (type2)
9000 : mode);
9001 int dwords = ((ca->byref
9002 ? ca->byref
9003 : mode2 == BLKmode
9004 ? int_size_in_bytes (type2)
9005 : GET_MODE_SIZE (mode2)) + 7) / 8;
9006 int numregs = MIN (dwords, NPARM_REGS (SImode)
9007 - ca->arg_count[(int) SH_ARG_INT]);
9009 if (numregs)
9011 ca->arg_count[(int) SH_ARG_INT] += numregs;
9012 if (TARGET_SHCOMPACT
9013 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9015 ca->call_cookie
9016 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9017 - numregs, 1);
9018 /* N.B. We want this also for outgoing. */
9019 ca->stack_regs += numregs;
9021 else if (ca->byref)
9023 if (! ca->outgoing)
9024 ca->stack_regs += numregs;
9025 ca->byref_regs += numregs;
9026 ca->byref = 0;
9028 ca->call_cookie
9029 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9030 - numregs, 2);
9031 while (--numregs);
9032 ca->call_cookie
9033 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9034 - 1, 1);
9036 else if (dwords > numregs)
9038 int pushregs = numregs;
9040 if (TARGET_SHCOMPACT)
9041 ca->stack_regs += numregs;
9042 while (pushregs < NPARM_REGS (SImode) - 1
9043 && (CALL_COOKIE_INT_REG_GET
9044 (ca->call_cookie,
9045 NPARM_REGS (SImode) - pushregs)
9046 == 1))
9048 ca->call_cookie
9049 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9050 - pushregs, 1);
9051 pushregs++;
9053 if (numregs == NPARM_REGS (SImode))
9054 ca->call_cookie
9055 |= CALL_COOKIE_INT_REG (0, 1)
9056 | CALL_COOKIE_STACKSEQ (numregs - 1);
9057 else
9058 ca->call_cookie
9059 |= CALL_COOKIE_STACKSEQ (numregs);
9062 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9063 && (named || ! ca->prototype_p))
9065 if (mode2 == SFmode && ca->free_single_fp_reg)
9066 ca->free_single_fp_reg = 0;
9067 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9068 < NPARM_REGS (SFmode))
9070 int numfpregs
9071 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9072 NPARM_REGS (SFmode)
9073 - ca->arg_count[(int) SH_ARG_FLOAT]);
9075 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9077 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9079 if (ca->outgoing && numregs > 0)
9082 ca->call_cookie
9083 |= (CALL_COOKIE_INT_REG
9084 (ca->arg_count[(int) SH_ARG_INT]
9085 - numregs + ((numfpregs - 2) / 2),
9086 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9087 - numfpregs) / 2));
9089 while (numfpregs -= 2);
9091 else if (mode2 == SFmode && (named)
9092 && (ca->arg_count[(int) SH_ARG_FLOAT]
9093 < NPARM_REGS (SFmode)))
9094 ca->free_single_fp_reg
9095 = FIRST_FP_PARM_REG - numfpregs
9096 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9099 return;
9102 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9104 /* Note that we've used the skipped register. */
9105 if (mode == SFmode && ca->free_single_fp_reg)
9107 ca->free_single_fp_reg = 0;
9108 return;
9110 /* When we have a DF after an SF, there's an SF register that get
9111 skipped in order to align the DF value. We note this skipped
9112 register, because the next SF value will use it, and not the
9113 SF that follows the DF. */
9114 if (mode == DFmode
9115 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
9117 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
9118 + BASE_ARG_REG (mode));
9122 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9123 || PASS_IN_REG_P (*ca, mode, type))
9124 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9125 = (ROUND_REG (*ca, mode)
9126 + (mode == BLKmode
9127 ? ROUND_ADVANCE (int_size_in_bytes (type))
9128 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
9131 /* The Renesas calling convention doesn't quite fit into this scheme since
9132 the address is passed like an invisible argument, but one that is always
9133 passed in memory. */
9134 static rtx
9135 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9137 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9138 return NULL_RTX;
9139 return gen_rtx_REG (Pmode, 2);
9142 /* Worker function for TARGET_FUNCTION_VALUE.
9144 For the SH, this is like LIBCALL_VALUE, except that we must change the
9145 mode like PROMOTE_MODE does.
9146 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9147 tested here has to be kept in sync with the one in
9148 explow.c:promote_mode. */
9149 static rtx
9150 sh_function_value (const_tree valtype,
9151 const_tree fn_decl_or_type,
9152 bool outgoing ATTRIBUTE_UNUSED)
9154 if (fn_decl_or_type
9155 && !DECL_P (fn_decl_or_type))
9156 fn_decl_or_type = NULL;
9158 return gen_rtx_REG (
9159 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9160 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9161 && (TREE_CODE (valtype) == INTEGER_TYPE
9162 || TREE_CODE (valtype) == ENUMERAL_TYPE
9163 || TREE_CODE (valtype) == BOOLEAN_TYPE
9164 || TREE_CODE (valtype) == REAL_TYPE
9165 || TREE_CODE (valtype) == OFFSET_TYPE))
9166 && sh_promote_prototypes (fn_decl_or_type)
9167 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9168 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9171 /* Worker function for TARGET_LIBCALL_VALUE. */
9172 static rtx
9173 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9175 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9178 /* Return true if N is a possible register number of function value. */
9179 static bool
9180 sh_function_value_regno_p (const unsigned int regno)
9182 return ((regno) == FIRST_RET_REG
9183 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9184 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9187 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9188 static bool
9189 sh_return_in_memory (const_tree type, const_tree fndecl)
9191 if (TARGET_SH5)
9193 if (TYPE_MODE (type) == BLKmode)
9194 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9195 else
9196 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9198 else
9200 return (TYPE_MODE (type) == BLKmode
9201 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9202 && TREE_CODE (type) == RECORD_TYPE));
9206 /* We actually emit the code in sh_expand_prologue. We used to use
9207 a static variable to flag that we need to emit this code, but that
9208 doesn't when inlining, when functions are deferred and then emitted
9209 later. Fortunately, we already have two flags that are part of struct
9210 function that tell if a function uses varargs or stdarg. */
9211 static void
9212 sh_setup_incoming_varargs (cumulative_args_t ca,
9213 enum machine_mode mode,
9214 tree type,
9215 int *pretend_arg_size,
9216 int second_time ATTRIBUTE_UNUSED)
9218 gcc_assert (cfun->stdarg);
9219 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9221 int named_parm_regs, anon_parm_regs;
9223 named_parm_regs = (ROUND_REG (*get_cumulative_args (ca), mode)
9224 + (mode == BLKmode
9225 ? ROUND_ADVANCE (int_size_in_bytes (type))
9226 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
9227 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9228 if (anon_parm_regs > 0)
9229 *pretend_arg_size = anon_parm_regs * 4;
9233 static bool
9234 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9236 return TARGET_SH5;
9239 static bool
9240 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9242 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9244 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9248 /* Define the offset between two registers, one to be eliminated, and
9249 the other its replacement, at the start of a routine. */
9251 initial_elimination_offset (int from, int to)
9253 int regs_saved;
9254 int regs_saved_rounding = 0;
9255 int total_saved_regs_space;
9256 int total_auto_space;
9257 int save_flags = target_flags;
9258 int copy_flags;
9259 HARD_REG_SET live_regs_mask;
9261 shmedia_space_reserved_for_target_registers = false;
9262 regs_saved = calc_live_regs (&live_regs_mask);
9263 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9265 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9267 shmedia_space_reserved_for_target_registers = true;
9268 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9271 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9272 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9273 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9275 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9276 copy_flags = target_flags;
9277 target_flags = save_flags;
9279 total_saved_regs_space = regs_saved + regs_saved_rounding;
9281 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9282 return total_saved_regs_space + total_auto_space
9283 + crtl->args.info.byref_regs * 8;
9285 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9286 return total_saved_regs_space + total_auto_space
9287 + crtl->args.info.byref_regs * 8;
9289 /* Initial gap between fp and sp is 0. */
9290 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9291 return 0;
9293 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9294 return rounded_frame_size (0);
9296 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9297 return rounded_frame_size (0);
9299 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9300 && (to == HARD_FRAME_POINTER_REGNUM
9301 || to == STACK_POINTER_REGNUM));
9302 if (TARGET_SH5)
9304 int n = total_saved_regs_space;
9305 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9306 save_schedule schedule;
9307 save_entry *entry;
9309 n += total_auto_space;
9311 /* If it wasn't saved, there's not much we can do. */
9312 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9313 return n;
9315 target_flags = copy_flags;
9317 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9318 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9319 if (entry->reg == pr_reg)
9321 target_flags = save_flags;
9322 return entry->offset;
9324 gcc_unreachable ();
9326 else
9327 return total_auto_space;
9330 /* Parse the -mfixed-range= option string. */
9331 void
9332 sh_fix_range (const char *const_str)
9334 int i, first, last;
9335 char *str, *dash, *comma;
9337 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9338 REG2 are either register names or register numbers. The effect
9339 of this option is to mark the registers in the range from REG1 to
9340 REG2 as ``fixed'' so they won't be used by the compiler. */
9342 i = strlen (const_str);
9343 str = (char *) alloca (i + 1);
9344 memcpy (str, const_str, i + 1);
9346 while (1)
9348 dash = strchr (str, '-');
9349 if (!dash)
9351 warning (0, "value of -mfixed-range must have form REG1-REG2");
9352 return;
9354 *dash = '\0';
9355 comma = strchr (dash + 1, ',');
9356 if (comma)
9357 *comma = '\0';
9359 first = decode_reg_name (str);
9360 if (first < 0)
9362 warning (0, "unknown register name: %s", str);
9363 return;
9366 last = decode_reg_name (dash + 1);
9367 if (last < 0)
9369 warning (0, "unknown register name: %s", dash + 1);
9370 return;
9373 *dash = '-';
9375 if (first > last)
9377 warning (0, "%s-%s is an empty range", str, dash + 1);
9378 return;
9381 for (i = first; i <= last; ++i)
9382 fixed_regs[i] = call_used_regs[i] = 1;
9384 if (!comma)
9385 break;
9387 *comma = ',';
9388 str = comma + 1;
9392 /* Insert any deferred function attributes from earlier pragmas. */
9393 static void
9394 sh_insert_attributes (tree node, tree *attributes)
9396 tree attrs;
9398 if (TREE_CODE (node) != FUNCTION_DECL)
9399 return;
9401 /* We are only interested in fields. */
9402 if (!DECL_P (node))
9403 return;
9405 /* Append the attributes to the deferred attributes. */
9406 *sh_deferred_function_attributes_tail = *attributes;
9407 attrs = sh_deferred_function_attributes;
9408 if (!attrs)
9409 return;
9411 /* Some attributes imply or require the interrupt attribute. */
9412 if (!lookup_attribute ("interrupt_handler", attrs)
9413 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9415 /* If we have a trapa_handler, but no interrupt_handler attribute,
9416 insert an interrupt_handler attribute. */
9417 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9418 /* We can't use sh_pr_interrupt here because that's not in the
9419 java frontend. */
9420 attrs
9421 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9422 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9423 if the interrupt attribute is missing, we ignore the attribute
9424 and warn. */
9425 else if (lookup_attribute ("sp_switch", attrs)
9426 || lookup_attribute ("trap_exit", attrs)
9427 || lookup_attribute ("nosave_low_regs", attrs)
9428 || lookup_attribute ("resbank", attrs))
9430 tree *tail;
9432 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9434 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9435 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9436 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9437 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9438 warning (OPT_Wattributes,
9439 "%qE attribute only applies to interrupt functions",
9440 TREE_PURPOSE (attrs));
9441 else
9443 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9444 NULL_TREE);
9445 tail = &TREE_CHAIN (*tail);
9448 attrs = *attributes;
9452 /* Install the processed list. */
9453 *attributes = attrs;
9455 /* Clear deferred attributes. */
9456 sh_deferred_function_attributes = NULL_TREE;
9457 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9459 return;
9462 /*------------------------------------------------------------------------------
9463 Target specific attributes
9464 Supported attributes are:
9466 * interrupt_handler
9467 Specifies this function is an interrupt handler.
9469 * trapa_handler
9470 Like interrupt_handler, but don't save all registers.
9472 * sp_switch
9473 Specifies an alternate stack for an interrupt handler to run on.
9475 * trap_exit
9476 Use a trapa to exit an interrupt function instead of rte.
9478 * nosave_low_regs
9479 Don't save r0..r7 in an interrupt handler function.
9480 This is useful on SH3* and SH4*, which have a separate set of low
9481 regs for user and privileged modes.
9482 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9483 those that run with interrupts disabled and thus can't be
9484 interrupted thenselves).
9486 * renesas
9487 Use Renesas calling/layout conventions (functions and structures).
9489 * resbank
9490 In case of an interrupt handler function, use a register bank to
9491 save registers R0-R14, MACH, MACL, GBR and PR.
9492 This is available only on SH2A targets.
9494 * function_vector
9495 Declares a function to be called using the TBR relative addressing
9496 mode. Takes an argument that specifies the slot number in the table
9497 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9500 /* Handle a 'resbank' attribute. */
9501 static tree
9502 sh_handle_resbank_handler_attribute (tree * node, tree name,
9503 tree args ATTRIBUTE_UNUSED,
9504 int flags ATTRIBUTE_UNUSED,
9505 bool * no_add_attrs)
9507 if (!TARGET_SH2A)
9509 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9510 name);
9511 *no_add_attrs = true;
9513 if (TREE_CODE (*node) != FUNCTION_DECL)
9515 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9516 name);
9517 *no_add_attrs = true;
9520 return NULL_TREE;
9523 /* Handle an "interrupt_handler" attribute; arguments as in
9524 struct attribute_spec.handler. */
9525 static tree
9526 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9527 tree args ATTRIBUTE_UNUSED,
9528 int flags ATTRIBUTE_UNUSED,
9529 bool *no_add_attrs)
9531 if (TREE_CODE (*node) != FUNCTION_DECL)
9533 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9534 name);
9535 *no_add_attrs = true;
9537 else if (TARGET_SHCOMPACT)
9539 error ("attribute interrupt_handler is not compatible with -m5-compact");
9540 *no_add_attrs = true;
9543 return NULL_TREE;
9546 /* Handle an 'function_vector' attribute; arguments as in
9547 struct attribute_spec.handler. */
9548 static tree
9549 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9550 tree args ATTRIBUTE_UNUSED,
9551 int flags ATTRIBUTE_UNUSED,
9552 bool * no_add_attrs)
9554 if (!TARGET_SH2A)
9556 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9557 name);
9558 *no_add_attrs = true;
9560 else if (TREE_CODE (*node) != FUNCTION_DECL)
9562 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9563 name);
9564 *no_add_attrs = true;
9566 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9568 /* The argument must be a constant integer. */
9569 warning (OPT_Wattributes,
9570 "%qE attribute argument not an integer constant",
9571 name);
9572 *no_add_attrs = true;
9574 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9576 /* The argument value must be between 0 to 255. */
9577 warning (OPT_Wattributes,
9578 "%qE attribute argument should be between 0 to 255",
9579 name);
9580 *no_add_attrs = true;
9582 return NULL_TREE;
9585 /* Returns true if current function has been assigned the attribute
9586 'function_vector'. */
9587 bool
9588 sh2a_is_function_vector_call (rtx x)
9590 if (GET_CODE (x) == SYMBOL_REF
9591 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9593 tree tr = SYMBOL_REF_DECL (x);
9595 if (sh2a_function_vector_p (tr))
9596 return true;
9599 return false;
9602 /* Returns the function vector number, if the attribute
9603 'function_vector' is assigned, otherwise returns zero. */
9605 sh2a_get_function_vector_number (rtx x)
9607 int num;
9608 tree list, t;
9610 if ((GET_CODE (x) == SYMBOL_REF)
9611 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9613 t = SYMBOL_REF_DECL (x);
9615 if (TREE_CODE (t) != FUNCTION_DECL)
9616 return 0;
9618 list = SH_ATTRIBUTES (t);
9619 while (list)
9621 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9623 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9624 return num;
9627 list = TREE_CHAIN (list);
9630 return 0;
9632 else
9633 return 0;
9636 /* Handle an "sp_switch" attribute; arguments as in
9637 struct attribute_spec.handler. */
9638 static tree
9639 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9640 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9642 if (TREE_CODE (*node) != FUNCTION_DECL)
9644 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9645 name);
9646 *no_add_attrs = true;
9648 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9650 /* The argument must be a constant string. */
9651 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9652 name);
9653 *no_add_attrs = true;
9656 return NULL_TREE;
9659 /* Handle an "trap_exit" attribute; arguments as in
9660 struct attribute_spec.handler. */
9661 static tree
9662 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9663 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9665 if (TREE_CODE (*node) != FUNCTION_DECL)
9667 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9668 name);
9669 *no_add_attrs = true;
9671 /* The argument specifies a trap number to be used in a trapa instruction
9672 at function exit (instead of an rte instruction). */
9673 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9675 /* The argument must be a constant integer. */
9676 warning (OPT_Wattributes, "%qE attribute argument not an "
9677 "integer constant", name);
9678 *no_add_attrs = true;
9681 return NULL_TREE;
9684 static tree
9685 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9686 tree name ATTRIBUTE_UNUSED,
9687 tree args ATTRIBUTE_UNUSED,
9688 int flags ATTRIBUTE_UNUSED,
9689 bool *no_add_attrs ATTRIBUTE_UNUSED)
9691 return NULL_TREE;
9694 /* True if __attribute__((renesas)) or -mrenesas. */
9695 bool
9696 sh_attr_renesas_p (const_tree td)
9698 if (TARGET_HITACHI)
9699 return true;
9700 if (td == NULL_TREE)
9701 return false;
9702 if (DECL_P (td))
9703 td = TREE_TYPE (td);
9704 if (td == error_mark_node)
9705 return false;
9706 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9707 != NULL_TREE);
9710 /* True if __attribute__((renesas)) or -mrenesas, for the current
9711 function. */
9712 bool
9713 sh_cfun_attr_renesas_p (void)
9715 return sh_attr_renesas_p (current_function_decl);
9718 /* Returns true if the current function has the "interrupt_handler"
9719 attribute set. */
9720 bool
9721 sh_cfun_interrupt_handler_p (void)
9723 return (lookup_attribute ("interrupt_handler",
9724 DECL_ATTRIBUTES (current_function_decl))
9725 != NULL_TREE);
9728 /* Returns true if FUNC has been assigned the attribute
9729 "function_vector". */
9730 bool
9731 sh2a_function_vector_p (tree func)
9733 tree list;
9734 if (TREE_CODE (func) != FUNCTION_DECL)
9735 return false;
9737 list = SH_ATTRIBUTES (func);
9738 while (list)
9740 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9741 return true;
9743 list = TREE_CHAIN (list);
9745 return false;
9748 /* Returns true if given tree has the "resbank" attribute set. */
9749 bool
9750 sh_cfun_resbank_handler_p (void)
9752 return ((lookup_attribute ("resbank",
9753 DECL_ATTRIBUTES (current_function_decl))
9754 != NULL_TREE)
9755 && (lookup_attribute ("interrupt_handler",
9756 DECL_ATTRIBUTES (current_function_decl))
9757 != NULL_TREE) && TARGET_SH2A);
9760 /* Returns true if the current function has a "trap_exit" attribute set. */
9761 bool
9762 sh_cfun_trap_exit_p (void)
9764 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9765 != NULL_TREE;
9768 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9769 static const char *
9770 sh_check_pch_target_flags (int old_flags)
9772 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9773 | MASK_SH_E | MASK_HARD_SH4
9774 | MASK_FPU_SINGLE | MASK_SH4))
9775 return _("created and used with different architectures / ABIs");
9776 if ((old_flags ^ target_flags) & MASK_HITACHI)
9777 return _("created and used with different ABIs");
9778 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9779 return _("created and used with different endianness");
9780 return NULL;
9783 /* Predicates used by the templates. */
9785 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9786 Used only in general_movsrc_operand. */
9787 bool
9788 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9790 switch (REGNO (op))
9792 case PR_REG:
9793 case MACL_REG:
9794 case MACH_REG:
9795 return true;
9797 return false;
9800 /* Returns true if OP is a floating point value with value 0.0. */
9801 bool
9802 fp_zero_operand (rtx op)
9804 REAL_VALUE_TYPE r;
9806 if (GET_MODE (op) != SFmode)
9807 return false;
9809 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9810 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9813 /* Returns true if OP is a floating point value with value 1.0. */
9814 bool
9815 fp_one_operand (rtx op)
9817 REAL_VALUE_TYPE r;
9819 if (GET_MODE (op) != SFmode)
9820 return false;
9822 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9823 return REAL_VALUES_EQUAL (r, dconst1);
9826 /* In general mode switching is used. If we are
9827 compiling without -mfmovd, movsf_ie isn't taken into account for
9828 mode switching. We could check in machine_dependent_reorg for
9829 cases where we know we are in single precision mode, but there is
9830 interface to find that out during reload, so we must avoid
9831 choosing an fldi alternative during reload and thus failing to
9832 allocate a scratch register for the constant loading. */
9833 bool
9834 fldi_ok (void)
9836 return true;
9839 /* Return the TLS type for TLS symbols. */
9840 enum tls_model
9841 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9843 if (GET_CODE (op) != SYMBOL_REF)
9844 return TLS_MODEL_NONE;
9845 return SYMBOL_REF_TLS_MODEL (op);
9848 /* Return the destination address of a branch. */
9849 static int
9850 branch_dest (rtx branch)
9852 rtx dest = SET_SRC (PATTERN (branch));
9853 int dest_uid;
9855 if (GET_CODE (dest) == IF_THEN_ELSE)
9856 dest = XEXP (dest, 1);
9857 dest = XEXP (dest, 0);
9858 dest_uid = INSN_UID (dest);
9859 return INSN_ADDRESSES (dest_uid);
9862 /* Return nonzero if REG is not used after INSN.
9863 We assume REG is a reload reg, and therefore does
9864 not live past labels. It may live past calls or jumps though. */
9865 bool
9866 reg_unused_after (rtx reg, rtx insn)
9868 enum rtx_code code;
9869 rtx set;
9871 /* If the reg is set by this instruction, then it is safe for our
9872 case. Disregard the case where this is a store to memory, since
9873 we are checking a register used in the store address. */
9874 set = single_set (insn);
9875 if (set && !MEM_P (SET_DEST (set))
9876 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9877 return true;
9879 while ((insn = NEXT_INSN (insn)))
9881 rtx set;
9882 if (!INSN_P (insn))
9883 continue;
9885 code = GET_CODE (insn);
9887 #if 0
9888 /* If this is a label that existed before reload, then the register
9889 is dead here. However, if this is a label added by reorg, then
9890 the register may still be live here. We can't tell the difference,
9891 so we just ignore labels completely. */
9892 if (code == CODE_LABEL)
9893 return 1;
9894 /* else */
9895 #endif
9897 if (code == JUMP_INSN)
9898 return false;
9900 /* If this is a sequence, we must handle them all at once.
9901 We could have for instance a call that sets the target register,
9902 and an insn in a delay slot that uses the register. In this case,
9903 we must return 0. */
9904 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9906 int i;
9907 int retval = 0;
9909 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9911 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9912 rtx set = single_set (this_insn);
9914 if (CALL_P (this_insn))
9915 code = CALL_INSN;
9916 else if (JUMP_P (this_insn))
9918 if (INSN_ANNULLED_BRANCH_P (this_insn))
9919 return false;
9920 code = JUMP_INSN;
9923 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9924 return false;
9925 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9927 if (!MEM_P (SET_DEST (set)))
9928 retval = true;
9929 else
9930 return false;
9932 if (set == NULL_RTX
9933 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9934 return false;
9936 if (retval == 1)
9937 return true;
9938 else if (code == JUMP_INSN)
9939 return false;
9942 set = single_set (insn);
9943 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9944 return false;
9945 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9946 return !MEM_P (SET_DEST (set));
9947 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9948 return false;
9950 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9951 return true;
9953 return true;
9956 #include "ggc.h"
9958 static GTY(()) rtx t_reg_rtx;
9960 get_t_reg_rtx (void)
9962 if (! t_reg_rtx)
9963 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
9964 return t_reg_rtx;
9967 static GTY(()) rtx fpscr_rtx;
9969 get_fpscr_rtx (void)
9971 if (! fpscr_rtx)
9973 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9974 REG_USERVAR_P (fpscr_rtx) = 1;
9975 mark_user_reg (fpscr_rtx);
9977 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9978 mark_user_reg (fpscr_rtx);
9979 return fpscr_rtx;
9982 static GTY(()) tree fpscr_values;
9984 static void
9985 emit_fpu_switch (rtx scratch, int index)
9987 rtx dst, src;
9989 if (fpscr_values == NULL)
9991 tree t;
9993 t = build_index_type (integer_one_node);
9994 t = build_array_type (integer_type_node, t);
9995 t = build_decl (BUILTINS_LOCATION,
9996 VAR_DECL, get_identifier ("__fpscr_values"), t);
9997 DECL_ARTIFICIAL (t) = 1;
9998 DECL_IGNORED_P (t) = 1;
9999 DECL_EXTERNAL (t) = 1;
10000 TREE_STATIC (t) = 1;
10001 TREE_PUBLIC (t) = 1;
10002 TREE_USED (t) = 1;
10004 fpscr_values = t;
10007 src = DECL_RTL (fpscr_values);
10008 if (!can_create_pseudo_p ())
10010 emit_move_insn (scratch, XEXP (src, 0));
10011 if (index != 0)
10012 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10013 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
10015 else
10016 src = adjust_address (src, PSImode, index * 4);
10018 dst = get_fpscr_rtx ();
10019 emit_move_insn (dst, src);
10022 void
10023 emit_sf_insn (rtx pat)
10025 emit_insn (pat);
10028 void
10029 emit_df_insn (rtx pat)
10031 emit_insn (pat);
10034 void
10035 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10037 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10040 void
10041 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10043 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
10044 get_fpscr_rtx ()));
10047 void
10048 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10050 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10053 void
10054 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10056 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
10057 get_fpscr_rtx ()));
10060 static rtx get_free_reg (HARD_REG_SET);
10062 /* This function returns a register to use to load the address to load
10063 the fpscr from. Currently it always returns r1 or r7, but when we are
10064 able to use pseudo registers after combine, or have a better mechanism
10065 for choosing a register, it should be done here. */
10066 /* REGS_LIVE is the liveness information for the point for which we
10067 need this allocation. In some bare-bones exit blocks, r1 is live at the
10068 start. We can even have all of r0..r3 being live:
10069 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10070 INSN before which new insns are placed with will clobber the register
10071 we return. If a basic block consists only of setting the return value
10072 register to a pseudo and using that register, the return value is not
10073 live before or after this block, yet we we'll insert our insns right in
10074 the middle. */
10075 static rtx
10076 get_free_reg (HARD_REG_SET regs_live)
10078 if (! TEST_HARD_REG_BIT (regs_live, 1))
10079 return gen_rtx_REG (Pmode, 1);
10081 /* Hard reg 1 is live; since this is a small register classes target,
10082 there shouldn't be anything but a jump before the function end. */
10083 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10084 return gen_rtx_REG (Pmode, 7);
10087 /* This function will set the fpscr from memory.
10088 MODE is the mode we are setting it to. */
10089 void
10090 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10092 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10093 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10094 rtx addr_reg;
10096 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10097 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10100 /* Is the given character a logical line separator for the assembler? */
10101 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10102 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10103 #endif
10105 static bool
10106 sequence_insn_p (rtx insn)
10108 rtx prev, next;
10110 prev = PREV_INSN (insn);
10111 if (prev == NULL)
10112 return false;
10114 next = NEXT_INSN (prev);
10115 if (next == NULL)
10116 return false;
10118 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10122 sh_insn_length_adjustment (rtx insn)
10124 /* Instructions with unfilled delay slots take up an extra two bytes for
10125 the nop in the delay slot. */
10126 if (((NONJUMP_INSN_P (insn)
10127 && GET_CODE (PATTERN (insn)) != USE
10128 && GET_CODE (PATTERN (insn)) != CLOBBER)
10129 || CALL_P (insn)
10130 || (JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)))
10131 && ! sequence_insn_p (insn)
10132 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10133 return 2;
10135 /* SH2e has a bug that prevents the use of annulled branches, so if
10136 the delay slot is not filled, we'll have to put a NOP in it. */
10137 if (sh_cpu_attr == CPU_SH2E
10138 && JUMP_P (insn) && !JUMP_TABLE_DATA_P (insn)
10139 && get_attr_type (insn) == TYPE_CBRANCH
10140 && ! sequence_insn_p (insn))
10141 return 2;
10143 /* sh-dsp parallel processing insn take four bytes instead of two. */
10145 if (NONJUMP_INSN_P (insn))
10147 int sum = 0;
10148 rtx body = PATTERN (insn);
10149 const char *templ;
10150 char c;
10151 bool maybe_label = true;
10153 if (GET_CODE (body) == ASM_INPUT)
10154 templ = XSTR (body, 0);
10155 else if (asm_noperands (body) >= 0)
10156 templ
10157 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10158 else
10159 return 0;
10162 int ppi_adjust = 0;
10165 c = *templ++;
10166 while (c == ' ' || c == '\t');
10167 /* all sh-dsp parallel-processing insns start with p.
10168 The only non-ppi sh insn starting with p is pref.
10169 The only ppi starting with pr is prnd. */
10170 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10171 ppi_adjust = 2;
10172 /* The repeat pseudo-insn expands two three insns, a total of
10173 six bytes in size. */
10174 else if ((c == 'r' || c == 'R')
10175 && ! strncasecmp ("epeat", templ, 5))
10176 ppi_adjust = 4;
10177 while (c && c != '\n'
10178 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10180 /* If this is a label, it is obviously not a ppi insn. */
10181 if (c == ':' && maybe_label)
10183 ppi_adjust = 0;
10184 break;
10186 else if (c == '\'' || c == '"')
10187 maybe_label = false;
10188 c = *templ++;
10190 sum += ppi_adjust;
10191 maybe_label = c != ':';
10193 while (c);
10194 return sum;
10196 return 0;
10199 /* Return TRUE for a valid displacement for the REG+disp addressing
10200 with MODE. */
10201 bool
10202 sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
10203 bool allow_zero)
10205 if (! CONST_INT_P (op))
10206 return false;
10208 if (TARGET_SHMEDIA)
10210 int size;
10212 /* Check if this is the address of an unaligned load / store. */
10213 if (mode == VOIDmode)
10214 return satisfies_constraint_I06 (op);
10216 size = GET_MODE_SIZE (mode);
10217 return (!(INTVAL (op) & (size - 1))
10218 && INTVAL (op) >= -512 * size
10219 && INTVAL (op) < 512 * size);
10221 else
10223 const HOST_WIDE_INT offset = INTVAL (op);
10224 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10225 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10227 /* If the mode does not support any displacement always return false.
10228 Even though an index of '0' is actually always valid, it will cause
10229 troubles when e.g. a DFmode move is split into two SFmode moves,
10230 where one SFmode move will have index '0' and the other move will
10231 have index '4'. */
10232 if (!allow_zero && max_disp < 1)
10233 return false;
10235 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10239 /* Recognize an RTL expression that is a valid memory address for
10240 an instruction.
10241 The MODE argument is the machine mode for the MEM expression
10242 that wants to use this address.
10243 Allow REG
10244 REG+disp
10245 REG+r0
10246 REG++
10247 --REG
10249 GBR+disp */
10250 static bool
10251 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
10253 if (REG_P (x) && REGNO (x) == GBR_REG)
10254 return true;
10256 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10257 return true;
10258 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10259 && ! TARGET_SHMEDIA
10260 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10261 return true;
10262 else if (GET_CODE (x) == PLUS
10263 && (mode != PSImode || reload_completed))
10265 rtx xop0 = XEXP (x, 0);
10266 rtx xop1 = XEXP (x, 1);
10268 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10269 return gbr_displacement (xop1, mode);
10271 if (GET_MODE_SIZE (mode) <= 8
10272 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10273 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10274 return true;
10276 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10277 || ((xop0 == stack_pointer_rtx
10278 || xop0 == hard_frame_pointer_rtx)
10279 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10280 || ((xop1 == stack_pointer_rtx
10281 || xop1 == hard_frame_pointer_rtx)
10282 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10283 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10284 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10285 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10286 && TARGET_FMOVD && mode == DFmode)))
10288 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10289 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10290 return true;
10291 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10292 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10293 return true;
10297 return false;
10300 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10301 isn't protected by a PIC unspec. */
10302 bool
10303 nonpic_symbol_mentioned_p (rtx x)
10305 const char *fmt;
10306 int i;
10308 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10309 || GET_CODE (x) == PC)
10310 return true;
10312 /* We don't want to look into the possible MEM location of a
10313 CONST_DOUBLE, since we're not going to use it, in general. */
10314 if (GET_CODE (x) == CONST_DOUBLE)
10315 return false;
10317 if (GET_CODE (x) == UNSPEC
10318 && (XINT (x, 1) == UNSPEC_PIC
10319 || XINT (x, 1) == UNSPEC_GOT
10320 || XINT (x, 1) == UNSPEC_GOTOFF
10321 || XINT (x, 1) == UNSPEC_GOTPLT
10322 || XINT (x, 1) == UNSPEC_GOTTPOFF
10323 || XINT (x, 1) == UNSPEC_DTPOFF
10324 || XINT (x, 1) == UNSPEC_TPOFF
10325 || XINT (x, 1) == UNSPEC_PLT
10326 || XINT (x, 1) == UNSPEC_SYMOFF
10327 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10328 return false;
10330 fmt = GET_RTX_FORMAT (GET_CODE (x));
10331 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10333 if (fmt[i] == 'E')
10335 int j;
10336 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10337 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10338 return true;
10340 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10341 return true;
10344 return false;
10347 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10348 @GOTOFF in `reg'. */
10350 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
10351 rtx reg)
10353 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10354 return orig;
10356 if (GET_CODE (orig) == LABEL_REF
10357 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10359 if (reg == NULL_RTX)
10360 reg = gen_reg_rtx (Pmode);
10362 emit_insn (gen_symGOTOFF2reg (reg, orig));
10363 return reg;
10365 else if (GET_CODE (orig) == SYMBOL_REF)
10367 if (reg == NULL_RTX)
10368 reg = gen_reg_rtx (Pmode);
10370 emit_insn (gen_symGOT2reg (reg, orig));
10371 return reg;
10373 return orig;
10376 /* Given a (logical) mode size and an offset in bytes, try to find a the
10377 appropriate displacement value for a mov insn. On SH the displacements
10378 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10379 15 bytes in QImode. To compensate this we create a new base address by
10380 adding an adjustment value to it.
10382 If the originally requested offset is greater than 127 we prefer using
10383 values 124..127 over 128..131 to increase opportunities to use the
10384 add #imm, Rn insn.
10386 In some cases it is possible that a requested offset might seem unaligned
10387 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10388 This is compensated by adjusting the base address so that the effective
10389 address of the displacement move insn will be aligned.
10391 This is not the best possible way of rebasing the base address, as it
10392 does not look at other present displacement addressings around it.
10393 In some cases this can create more base address adjustments than would
10394 actually be necessary. */
10395 struct disp_adjust
10397 rtx offset_adjust;
10398 rtx mov_disp;
10401 static struct disp_adjust
10402 sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
10404 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10406 /* Do not try to use SH2A's large displacements here, because this would
10407 effectively disable the small displacement insns. */
10408 const int mode_sz = GET_MODE_SIZE (mode);
10409 const int mov_insn_sz = mov_insn_size (mode, false);
10410 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10411 const int max_disp_next = max_disp + mov_insn_sz;
10412 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10413 HOST_WIDE_INT offset_adjust;
10415 /* In some cases this actually does happen and we must check for it. */
10416 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10417 return res;
10419 /* Keeps the previous behavior for QImode displacement addressing.
10420 This just decides how the offset is re-based. Removing this special
10421 case will result in slightly bigger code on average, but it's not that
10422 bad actually. */
10423 if (mov_insn_sz == 1)
10424 align_modifier = 0;
10426 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10428 if (mode_sz + offset - offset_adjust <= max_disp_next)
10430 res.offset_adjust = GEN_INT (offset_adjust);
10431 res.mov_disp = GEN_INT (offset - offset_adjust);
10434 return res;
10437 /* Try to modify an illegitimate address and make it legitimate.
10438 If we find one, return the new, valid address.
10439 Otherwise, return the original address. */
10440 static rtx
10441 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
10443 if (flag_pic)
10444 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10446 if (TARGET_SHMEDIA)
10447 return x;
10449 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10450 || (TARGET_SH2E && mode == SFmode))
10451 return x;
10453 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10454 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10456 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10457 INTVAL (XEXP (x, 1)));
10459 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10461 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10462 adj.offset_adjust, NULL_RTX, 0,
10463 OPTAB_LIB_WIDEN);
10464 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10468 return x;
10471 /* Attempt to replace *p, which is an address that needs reloading, with
10472 a valid memory address for an operand of mode MODE.
10473 Like for sh_legitimize_address, for the SH we try to get a normal form
10474 of the address. That will allow inheritance of the address reloads. */
10475 bool
10476 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
10477 int itype)
10479 enum reload_type type = (enum reload_type) itype;
10480 const int mode_sz = GET_MODE_SIZE (mode);
10482 if (TARGET_SHMEDIA)
10483 return false;
10485 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10486 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10487 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
10488 && (ALLOW_INDEXED_ADDRESS
10489 || XEXP (*p, 0) == stack_pointer_rtx
10490 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10492 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10493 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10495 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10497 push_reload (*p, NULL_RTX, p, NULL,
10498 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10499 return true;
10502 if (TARGET_SH2E && mode == SFmode)
10504 *p = copy_rtx (*p);
10505 push_reload (*p, NULL_RTX, p, NULL,
10506 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10507 return true;
10510 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10511 moves because then reload has a problem figuring the constraint
10512 that the move insn target/source reg must be R0.
10513 Or maybe some handling is wrong in sh_secondary_reload for this
10514 to work properly? */
10515 if ((mode_sz == 4 || mode_sz == 8)
10516 && ! (TARGET_SH4 && mode == DFmode)
10517 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10519 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10520 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10521 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10522 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10523 return true;
10527 /* We must re-recognize what we created before. */
10528 if (GET_CODE (*p) == PLUS
10529 && (mode_sz == 4 || mode_sz == 8)
10530 && GET_CODE (XEXP (*p, 0)) == PLUS
10531 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10532 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10533 && CONST_INT_P (XEXP (*p, 1))
10534 && ! (TARGET_SH2E && mode == SFmode))
10536 /* Because this address is so complex, we know it must have
10537 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10538 it is already unshared, and needs no further unsharing. */
10539 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10540 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10541 return true;
10544 return false;
10547 /* In the name of slightly smaller debug output, and to cater to
10548 general assembler lossage, recognize various UNSPEC sequences
10549 and turn them back into a direct symbol reference. */
10550 static rtx
10551 sh_delegitimize_address (rtx orig_x)
10553 rtx x, y;
10555 orig_x = delegitimize_mem_from_attrs (orig_x);
10557 x = orig_x;
10558 if (MEM_P (x))
10559 x = XEXP (x, 0);
10560 if (GET_CODE (x) == CONST)
10562 y = XEXP (x, 0);
10563 if (GET_CODE (y) == UNSPEC)
10565 if (XINT (y, 1) == UNSPEC_GOT
10566 || XINT (y, 1) == UNSPEC_GOTOFF
10567 || XINT (y, 1) == UNSPEC_SYMOFF)
10568 return XVECEXP (y, 0, 0);
10569 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10571 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10573 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10575 if (GET_CODE (symplt) == UNSPEC
10576 && XINT (symplt, 1) == UNSPEC_PLT)
10577 return XVECEXP (symplt, 0, 0);
10580 else if (TARGET_SHMEDIA
10581 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10582 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10584 rtx offset = XVECEXP (y, 0, 1);
10586 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10587 if (MEM_P (orig_x))
10588 x = replace_equiv_address_nv (orig_x, x);
10589 return x;
10594 return orig_x;
10597 /* Mark the use of a constant in the literal table. If the constant
10598 has multiple labels, make it unique. */
10599 static rtx
10600 mark_constant_pool_use (rtx x)
10602 rtx insn, lab, pattern;
10604 if (x == NULL_RTX)
10605 return x;
10607 switch (GET_CODE (x))
10609 case LABEL_REF:
10610 x = XEXP (x, 0);
10611 case CODE_LABEL:
10612 break;
10613 default:
10614 return x;
10617 /* Get the first label in the list of labels for the same constant
10618 and delete another labels in the list. */
10619 lab = x;
10620 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
10622 if (!LABEL_P (insn)
10623 || LABEL_REFS (insn) != NEXT_INSN (insn))
10624 break;
10625 lab = insn;
10628 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10629 INSN_DELETED_P (insn) = 1;
10631 /* Mark constants in a window. */
10632 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
10634 if (!NONJUMP_INSN_P (insn))
10635 continue;
10637 pattern = PATTERN (insn);
10638 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10639 continue;
10641 switch (XINT (pattern, 1))
10643 case UNSPECV_CONST2:
10644 case UNSPECV_CONST4:
10645 case UNSPECV_CONST8:
10646 XVECEXP (pattern, 0, 1) = const1_rtx;
10647 break;
10648 case UNSPECV_WINDOW_END:
10649 if (XVECEXP (pattern, 0, 0) == x)
10650 return lab;
10651 break;
10652 case UNSPECV_CONST_END:
10653 return lab;
10654 default:
10655 break;
10659 return lab;
10662 /* Return true if it's possible to redirect BRANCH1 to the destination
10663 of an unconditional jump BRANCH2. We only want to do this if the
10664 resulting branch will have a short displacement. */
10665 bool
10666 sh_can_redirect_branch (rtx branch1, rtx branch2)
10668 if (flag_expensive_optimizations && simplejump_p (branch2))
10670 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10671 rtx insn;
10672 int distance;
10674 for (distance = 0, insn = NEXT_INSN (branch1);
10675 insn && distance < 256;
10676 insn = PREV_INSN (insn))
10678 if (insn == dest)
10679 return true;
10680 else
10681 distance += get_attr_length (insn);
10683 for (distance = 0, insn = NEXT_INSN (branch1);
10684 insn && distance < 256;
10685 insn = NEXT_INSN (insn))
10687 if (insn == dest)
10688 return true;
10689 else
10690 distance += get_attr_length (insn);
10693 return false;
10696 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10697 bool
10698 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10699 unsigned int new_reg)
10701 /* Interrupt functions can only use registers that have already been
10702 saved by the prologue, even if they would normally be
10703 call-clobbered. */
10704 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10705 return false;
10707 return true;
10710 /* Function to update the integer COST
10711 based on the relationship between INSN that is dependent on
10712 DEP_INSN through the dependence LINK. The default is to make no
10713 adjustment to COST. This can be used for example to specify to
10714 the scheduler that an output- or anti-dependence does not incur
10715 the same cost as a data-dependence. The return value should be
10716 the new value for COST. */
10717 static int
10718 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10720 rtx reg, use_pat;
10722 if (TARGET_SHMEDIA)
10724 /* On SHmedia, if the dependence is an anti-dependence or
10725 output-dependence, there is no cost. */
10726 if (REG_NOTE_KIND (link) != 0)
10728 /* However, dependencies between target register loads and
10729 uses of the register in a subsequent block that are separated
10730 by a conditional branch are not modelled - we have to do with
10731 the anti-dependency between the target register load and the
10732 conditional branch that ends the current block. */
10733 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10734 && GET_CODE (PATTERN (dep_insn)) == SET
10735 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10736 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10737 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10739 int orig_cost = cost;
10740 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10741 rtx target = ((! note
10742 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
10743 ? insn : JUMP_LABEL (insn));
10744 /* On the likely path, the branch costs 1, on the unlikely path,
10745 it costs 3. */
10746 cost--;
10748 target = next_active_insn (target);
10749 while (target && ! flow_dependent_p (target, dep_insn)
10750 && --cost > 0);
10751 /* If two branches are executed in immediate succession, with the
10752 first branch properly predicted, this causes a stall at the
10753 second branch, hence we won't need the target for the
10754 second branch for two cycles after the launch of the first
10755 branch. */
10756 if (cost > orig_cost - 2)
10757 cost = orig_cost - 2;
10759 else
10760 cost = 0;
10763 else if (get_attr_is_mac_media (insn)
10764 && get_attr_is_mac_media (dep_insn))
10765 cost = 1;
10767 else if (! reload_completed
10768 && GET_CODE (PATTERN (insn)) == SET
10769 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10770 && GET_CODE (PATTERN (dep_insn)) == SET
10771 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10772 && cost < 4)
10773 cost = 4;
10774 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10775 that is needed at the target. */
10776 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10777 && ! flow_dependent_p (insn, dep_insn))
10778 cost--;
10780 else if (REG_NOTE_KIND (link) == 0)
10782 enum attr_type type;
10783 rtx dep_set;
10785 if (recog_memoized (insn) < 0
10786 || recog_memoized (dep_insn) < 0)
10787 return cost;
10789 dep_set = single_set (dep_insn);
10791 /* The latency that we specify in the scheduling description refers
10792 to the actual output, not to an auto-increment register; for that,
10793 the latency is one. */
10794 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10796 rtx set = single_set (insn);
10798 if (set
10799 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10800 && (!MEM_P (SET_DEST (set))
10801 || !reg_mentioned_p (SET_DEST (dep_set),
10802 XEXP (SET_DEST (set), 0))))
10803 cost = 1;
10805 /* The only input for a call that is timing-critical is the
10806 function's address. */
10807 if (CALL_P (insn))
10809 rtx call = get_call_rtx_from (insn);
10810 if (call
10811 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10812 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10813 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10814 cost -= TARGET_SH4_300 ? 3 : 6;
10816 /* Likewise, the most timing critical input for an sfuncs call
10817 is the function address. However, sfuncs typically start
10818 using their arguments pretty quickly.
10819 Assume a four cycle delay for SH4 before they are needed.
10820 Cached ST40-300 calls are quicker, so assume only a one
10821 cycle delay there.
10822 ??? Maybe we should encode the delays till input registers
10823 are needed by sfuncs into the sfunc call insn. */
10824 /* All sfunc calls are parallels with at least four components.
10825 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10826 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10827 && XVECLEN (PATTERN (insn), 0) >= 4
10828 && (reg = sfunc_uses_reg (insn)))
10830 if (! reg_set_p (reg, dep_insn))
10831 cost -= TARGET_SH4_300 ? 1 : 4;
10833 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10835 enum attr_type dep_type = get_attr_type (dep_insn);
10837 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10838 cost--;
10839 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10840 && (type = get_attr_type (insn)) != TYPE_CALL
10841 && type != TYPE_SFUNC)
10842 cost--;
10843 /* When the preceding instruction loads the shift amount of
10844 the following SHAD/SHLD, the latency of the load is increased
10845 by 1 cycle. */
10846 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10847 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10848 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10849 XEXP (SET_SRC (single_set (insn)),
10850 1)))
10851 cost++;
10852 /* When an LS group instruction with a latency of less than
10853 3 cycles is followed by a double-precision floating-point
10854 instruction, FIPR, or FTRV, the latency of the first
10855 instruction is increased to 3 cycles. */
10856 else if (cost < 3
10857 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10858 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10859 cost = 3;
10860 /* The lsw register of a double-precision computation is ready one
10861 cycle earlier. */
10862 else if (reload_completed
10863 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10864 && (use_pat = single_set (insn))
10865 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10866 SET_SRC (use_pat)))
10867 cost -= 1;
10869 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10870 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10871 cost -= 1;
10873 else if (TARGET_SH4_300)
10875 /* Stores need their input register two cycles later. */
10876 if (dep_set && cost >= 1
10877 && ((type = get_attr_type (insn)) == TYPE_STORE
10878 || type == TYPE_PSTORE
10879 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10881 rtx set = single_set (insn);
10883 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10884 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10886 cost -= 2;
10887 /* But don't reduce the cost below 1 if the address depends
10888 on a side effect of dep_insn. */
10889 if (cost < 1
10890 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10891 cost = 1;
10896 /* An anti-dependence penalty of two applies if the first insn is a double
10897 precision fadd / fsub / fmul. */
10898 else if (!TARGET_SH4_300
10899 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10900 && recog_memoized (dep_insn) >= 0
10901 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10902 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10903 /* A lot of alleged anti-flow dependences are fake,
10904 so check this one is real. */
10905 && flow_dependent_p (dep_insn, insn))
10906 cost = 2;
10908 return cost;
10911 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10912 if DEP_INSN is anti-flow dependent on INSN. */
10913 static bool
10914 flow_dependent_p (rtx insn, rtx dep_insn)
10916 rtx tmp = PATTERN (insn);
10918 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10919 return tmp == NULL_RTX;
10922 /* A helper function for flow_dependent_p called through note_stores. */
10923 static void
10924 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10926 rtx * pinsn = (rtx *) data;
10928 if (*pinsn && reg_referenced_p (x, *pinsn))
10929 *pinsn = NULL_RTX;
10932 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10933 'special function' patterns (type sfunc) that clobber pr, but that
10934 do not look like function calls to leaf_function_p. Hence we must
10935 do this extra check. */
10936 static int
10937 sh_pr_n_sets (void)
10939 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10942 /* Return where to allocate pseudo for a given hard register initial
10943 value. */
10944 static rtx
10945 sh_allocate_initial_value (rtx hard_reg)
10947 rtx x;
10949 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10951 if (crtl->is_leaf
10952 && ! sh_pr_n_sets ()
10953 && ! (TARGET_SHCOMPACT
10954 && ((crtl->args.info.call_cookie
10955 & ~ CALL_COOKIE_RET_TRAMP (1))
10956 || crtl->saves_all_registers)))
10957 x = hard_reg;
10958 else
10959 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10961 else
10962 x = NULL_RTX;
10964 return x;
10967 /* This function returns "2" to indicate dual issue for the SH4
10968 processor. To be used by the DFA pipeline description. */
10969 static int
10970 sh_issue_rate (void)
10972 if (TARGET_SUPERSCALAR)
10973 return 2;
10974 else
10975 return 1;
10978 /* Functions for ready queue reordering for sched1. */
10980 /* Get weight for mode for a set x. */
10981 static short
10982 find_set_regmode_weight (rtx x, enum machine_mode mode)
10984 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10985 return 1;
10986 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10988 if (REG_P (SET_DEST (x)))
10990 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10991 return 1;
10992 else
10993 return 0;
10995 return 1;
10997 return 0;
11000 /* Get regmode weight for insn. */
11001 static short
11002 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
11004 short reg_weight = 0;
11005 rtx x;
11007 /* Increment weight for each register born here. */
11008 x = PATTERN (insn);
11009 reg_weight += find_set_regmode_weight (x, mode);
11010 if (GET_CODE (x) == PARALLEL)
11012 int j;
11013 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11015 x = XVECEXP (PATTERN (insn), 0, j);
11016 reg_weight += find_set_regmode_weight (x, mode);
11019 /* Decrement weight for each register that dies here. */
11020 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11022 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11024 rtx note = XEXP (x, 0);
11025 if (REG_P (note) && GET_MODE (note) == mode)
11026 reg_weight--;
11029 return reg_weight;
11032 /* Calculate regmode weights for all insns of a basic block. */
11033 static void
11034 find_regmode_weight (basic_block b, enum machine_mode mode)
11036 rtx insn, next_tail, head, tail;
11038 get_ebb_head_tail (b, b, &head, &tail);
11039 next_tail = NEXT_INSN (tail);
11041 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11043 /* Handle register life information. */
11044 if (!INSN_P (insn))
11045 continue;
11047 if (mode == SFmode)
11048 INSN_REGMODE_WEIGHT (insn, mode) =
11049 find_insn_regmode_weight (insn, mode)
11050 + 2 * find_insn_regmode_weight (insn, DFmode);
11051 else if (mode == SImode)
11052 INSN_REGMODE_WEIGHT (insn, mode) =
11053 find_insn_regmode_weight (insn, mode)
11054 + 2 * find_insn_regmode_weight (insn, DImode);
11058 /* Comparison function for ready queue sorting. */
11059 static int
11060 rank_for_reorder (const void *x, const void *y)
11062 rtx tmp = *(const rtx *) y;
11063 rtx tmp2 = *(const rtx *) x;
11065 /* The insn in a schedule group should be issued the first. */
11066 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11067 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11069 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11070 minimizes instruction movement, thus minimizing sched's effect on
11071 register pressure. */
11072 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11075 /* Resort the array A in which only element at index N may be out of order. */
11076 static void
11077 swap_reorder (rtx *a, int n)
11079 rtx insn = a[n - 1];
11080 int i = n - 2;
11082 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11084 a[i + 1] = a[i];
11085 i -= 1;
11087 a[i + 1] = insn;
11090 /* Sort the ready list by ascending priority. */
11091 static void
11092 ready_reorder (rtx *ready, int nready)
11094 if (nready == 2)
11095 swap_reorder (ready, nready);
11096 else if (nready > 2)
11097 qsort (ready, nready, sizeof (rtx), rank_for_reorder);
11100 /* Count life regions of r0 for a block. */
11101 static int
11102 find_r0_life_regions (basic_block b)
11104 rtx end, insn;
11105 rtx pset;
11106 rtx r0_reg;
11107 int live;
11108 int set;
11109 int death = 0;
11111 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11113 set = 1;
11114 live = 1;
11116 else
11118 set = 0;
11119 live = 0;
11122 insn = BB_HEAD (b);
11123 end = BB_END (b);
11124 r0_reg = gen_rtx_REG (SImode, R0_REG);
11125 while (1)
11127 if (INSN_P (insn))
11129 if (find_regno_note (insn, REG_DEAD, R0_REG))
11131 death++;
11132 live = 0;
11134 if (!live
11135 && (pset = single_set (insn))
11136 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11137 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11139 set++;
11140 live = 1;
11143 if (insn == end)
11144 break;
11145 insn = NEXT_INSN (insn);
11147 return set - death;
11150 /* Calculate regmode weights for all insns of all basic block. */
11151 static void
11152 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11153 int verbose ATTRIBUTE_UNUSED,
11154 int old_max_uid)
11156 basic_block b;
11158 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11159 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11160 r0_life_regions = 0;
11162 FOR_EACH_BB_REVERSE (b)
11164 find_regmode_weight (b, SImode);
11165 find_regmode_weight (b, SFmode);
11166 if (!reload_completed)
11167 r0_life_regions += find_r0_life_regions (b);
11170 CURR_REGMODE_PRESSURE (SImode) = 0;
11171 CURR_REGMODE_PRESSURE (SFmode) = 0;
11174 /* Cleanup. */
11175 static void
11176 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11177 int verbose ATTRIBUTE_UNUSED)
11179 if (regmode_weight[0])
11181 free (regmode_weight[0]);
11182 regmode_weight[0] = NULL;
11184 if (regmode_weight[1])
11186 free (regmode_weight[1]);
11187 regmode_weight[1] = NULL;
11191 /* The scalar modes supported differs from the default version in TImode
11192 for 32-bit SHMEDIA. */
11193 static bool
11194 sh_scalar_mode_supported_p (enum machine_mode mode)
11196 if (TARGET_SHMEDIA32 && mode == TImode)
11197 return false;
11199 return default_scalar_mode_supported_p (mode);
11202 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11203 keep count of register pressures on SImode and SFmode. */
11204 static int
11205 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11206 int sched_verbose ATTRIBUTE_UNUSED,
11207 rtx insn,
11208 int can_issue_more)
11210 if (GET_CODE (PATTERN (insn)) != USE
11211 && GET_CODE (PATTERN (insn)) != CLOBBER)
11212 cached_can_issue_more = can_issue_more - 1;
11213 else
11214 cached_can_issue_more = can_issue_more;
11216 if (reload_completed)
11217 return cached_can_issue_more;
11219 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11220 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11222 return cached_can_issue_more;
11225 static void
11226 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11227 int verbose ATTRIBUTE_UNUSED,
11228 int veclen ATTRIBUTE_UNUSED)
11230 CURR_REGMODE_PRESSURE (SImode) = 0;
11231 CURR_REGMODE_PRESSURE (SFmode) = 0;
11234 /* Some magic numbers. */
11235 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11236 functions that already have high pressure on r0. */
11237 #define R0_MAX_LIFE_REGIONS 2
11238 /* Register Pressure thresholds for SImode and SFmode registers. */
11239 #define SIMODE_MAX_WEIGHT 5
11240 #define SFMODE_MAX_WEIGHT 10
11242 /* Return true if the pressure is high for MODE. */
11243 static bool
11244 high_pressure (enum machine_mode mode)
11246 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11247 functions that already have high pressure on r0. */
11248 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11249 return true;
11251 if (mode == SFmode)
11252 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11253 else
11254 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11257 /* Reorder ready queue if register pressure is high. */
11258 static int
11259 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11260 int sched_verbose ATTRIBUTE_UNUSED,
11261 rtx *ready,
11262 int *n_readyp,
11263 int clock_var ATTRIBUTE_UNUSED)
11265 if (reload_completed)
11266 return sh_issue_rate ();
11268 if (high_pressure (SFmode) || high_pressure (SImode))
11270 ready_reorder (ready, *n_readyp);
11273 return sh_issue_rate ();
11276 /* Skip cycles if the current register pressure is high. */
11277 static int
11278 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11279 int sched_verbose ATTRIBUTE_UNUSED,
11280 rtx *ready ATTRIBUTE_UNUSED,
11281 int *n_readyp ATTRIBUTE_UNUSED,
11282 int clock_var ATTRIBUTE_UNUSED)
11284 if (reload_completed)
11285 return cached_can_issue_more;
11287 if (high_pressure(SFmode) || high_pressure (SImode))
11288 skip_cycles = 1;
11290 return cached_can_issue_more;
11293 /* Skip cycles without sorting the ready queue. This will move insn from
11294 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11295 queue by sh_reorder. */
11297 /* Generally, skipping these many cycles are sufficient for all insns to move
11298 from Q -> R. */
11299 #define MAX_SKIPS 8
11301 static int
11302 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11303 int sched_verbose ATTRIBUTE_UNUSED,
11304 rtx insn ATTRIBUTE_UNUSED,
11305 int last_clock_var,
11306 int clock_var,
11307 int *sort_p)
11309 if (reload_completed)
11310 return 0;
11312 if (skip_cycles)
11314 if ((clock_var - last_clock_var) < MAX_SKIPS)
11316 *sort_p = 0;
11317 return 1;
11319 /* If this is the last cycle we are skipping, allow reordering of R. */
11320 if ((clock_var - last_clock_var) == MAX_SKIPS)
11322 *sort_p = 1;
11323 return 1;
11327 skip_cycles = 0;
11329 return 0;
11332 /* SHmedia requires registers for branches, so we can't generate new
11333 branches past reload. */
11334 static bool
11335 sh_cannot_modify_jumps_p (void)
11337 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11340 static reg_class_t
11341 sh_target_reg_class (void)
11343 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11346 static bool
11347 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11349 if (! shmedia_space_reserved_for_target_registers)
11350 return 0;
11351 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11352 return 0;
11354 HARD_REG_SET dummy;
11355 if (calc_live_regs (&dummy) >= 6 * 8)
11356 return 1;
11357 return 0;
11360 static bool
11361 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11363 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11367 On the SH1..SH4, the trampoline looks like
11368 2 0002 D202 mov.l l2,r2
11369 1 0000 D301 mov.l l1,r3
11370 3 0004 422B jmp @r2
11371 4 0006 0009 nop
11372 5 0008 00000000 l1: .long area
11373 6 000c 00000000 l2: .long function
11375 SH5 (compact) uses r1 instead of r3 for the static chain. */
11378 /* Emit RTL insns to initialize the variable parts of a trampoline.
11379 FNADDR is an RTX for the address of the function's pure code.
11380 CXT is an RTX for the static chain value for the function. */
11381 static void
11382 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11384 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11385 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11387 if (TARGET_SHMEDIA64)
11389 rtx tramp_templ;
11390 int fixed_len;
11392 rtx movi1 = GEN_INT (0xcc000010);
11393 rtx shori1 = GEN_INT (0xc8000010);
11394 rtx src, dst;
11396 /* The following trampoline works within a +- 128 KB range for cxt:
11397 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11398 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11399 gettr tr1,r1; blink tr0,r63 */
11400 /* Address rounding makes it hard to compute the exact bounds of the
11401 offset for this trampoline, but we have a rather generous offset
11402 range, so frame_offset should do fine as an upper bound. */
11403 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11405 /* ??? could optimize this trampoline initialization
11406 by writing DImode words with two insns each. */
11407 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11408 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11409 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11410 insn = gen_rtx_AND (DImode, insn, mask);
11411 /* Or in ptb/u .,tr1 pattern */
11412 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11413 insn = force_operand (insn, NULL_RTX);
11414 insn = gen_lowpart (SImode, insn);
11415 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11416 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11417 insn = gen_rtx_AND (DImode, insn, mask);
11418 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11419 insn = gen_lowpart (SImode, insn);
11420 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11421 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11422 insn = gen_rtx_AND (DImode, insn, mask);
11423 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11424 insn = gen_lowpart (SImode, insn);
11425 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11426 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11427 insn = gen_rtx_AND (DImode, insn, mask);
11428 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11429 insn = gen_lowpart (SImode, insn);
11430 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11431 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11432 insn = gen_rtx_AND (DImode, insn, mask);
11433 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11434 insn = gen_lowpart (SImode, insn);
11435 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11436 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11437 GEN_INT (0x6bf10600));
11438 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11439 GEN_INT (0x4415fc10));
11440 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11441 GEN_INT (0x4401fff0));
11442 emit_insn (gen_ic_invalidate_line (tramp));
11443 return;
11445 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11446 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11448 tramp_templ = gen_datalabel_ref (tramp_templ);
11449 dst = tramp_mem;
11450 src = gen_const_mem (BLKmode, tramp_templ);
11451 set_mem_align (dst, 256);
11452 set_mem_align (src, 64);
11453 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11455 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11456 emit_move_insn (adjust_address (tramp_mem, Pmode,
11457 fixed_len + GET_MODE_SIZE (Pmode)),
11458 cxt);
11459 emit_insn (gen_ic_invalidate_line (tramp));
11460 return;
11462 else if (TARGET_SHMEDIA)
11464 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11465 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11466 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11467 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11468 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11469 rotated 10 right, and higher 16 bit of every 32 selected. */
11470 rtx movishori
11471 = force_reg (V2HImode, (simplify_gen_subreg
11472 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11473 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11474 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11476 fnaddr = force_reg (SImode, fnaddr);
11477 cxt = force_reg (SImode, cxt);
11478 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11479 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11480 movishori));
11481 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11482 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11483 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11484 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11485 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11486 gen_rtx_SUBREG (V2HImode, cxt, 0),
11487 movishori));
11488 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11489 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11490 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11491 if (TARGET_LITTLE_ENDIAN)
11493 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11494 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11496 else
11498 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11499 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11501 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11502 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11503 emit_insn (gen_ic_invalidate_line (tramp));
11504 return;
11506 else if (TARGET_SHCOMPACT)
11508 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11509 return;
11511 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11512 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11513 SImode));
11514 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11515 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11516 SImode));
11517 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11518 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11519 if (TARGET_HARD_SH4 || TARGET_SH5)
11521 if (!TARGET_INLINE_IC_INVALIDATE
11522 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
11523 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11524 FUNCTION_ORDINARY),
11525 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11526 else
11527 emit_insn (gen_ic_invalidate_line (tramp));
11531 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11532 static rtx
11533 sh_trampoline_adjust_address (rtx tramp)
11535 if (TARGET_SHMEDIA)
11536 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11537 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11538 return tramp;
11541 /* FIXME: This is overly conservative. A SHcompact function that
11542 receives arguments ``by reference'' will have them stored in its
11543 own stack frame, so it must not pass pointers or references to
11544 these arguments to other functions by means of sibling calls. */
11545 /* If PIC, we cannot make sibling calls to global functions
11546 because the PLT requires r12 to be live. */
11547 static bool
11548 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11550 return (1
11551 && (! TARGET_SHCOMPACT
11552 || crtl->args.info.stack_regs == 0)
11553 && ! sh_cfun_interrupt_handler_p ()
11554 && (! flag_pic
11555 || (decl && ! TREE_PUBLIC (decl))
11556 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11559 /* Machine specific built-in functions. */
11561 struct builtin_description
11563 bool (* const is_enabled) (void);
11564 const enum insn_code icode;
11565 const char *const name;
11566 int signature;
11567 tree fndecl;
11570 static bool
11571 shmedia_builtin_p (void)
11573 return TARGET_SHMEDIA;
11576 /* This function can be used if there are any built-ins that are not for
11577 SHmedia. It's commented out to avoid the defined-but-unused warning.
11578 static bool
11579 sh1_builtin_p (void)
11581 return TARGET_SH1;
11585 /* describe number and signedness of arguments; arg[0] == result
11586 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11587 /* 9: 64-bit pointer, 10: 32-bit pointer */
11588 static const char signature_args[][4] =
11590 #define SH_BLTIN_V2SI2 0
11591 { 4, 4 },
11592 #define SH_BLTIN_V4HI2 1
11593 { 4, 4 },
11594 #define SH_BLTIN_V2SI3 2
11595 { 4, 4, 4 },
11596 #define SH_BLTIN_V4HI3 3
11597 { 4, 4, 4 },
11598 #define SH_BLTIN_V8QI3 4
11599 { 4, 4, 4 },
11600 #define SH_BLTIN_MAC_HISI 5
11601 { 1, 4, 4, 1 },
11602 #define SH_BLTIN_SH_HI 6
11603 { 4, 4, 1 },
11604 #define SH_BLTIN_SH_SI 7
11605 { 4, 4, 1 },
11606 #define SH_BLTIN_V4HI2V2SI 8
11607 { 4, 4, 4 },
11608 #define SH_BLTIN_V4HI2V8QI 9
11609 { 4, 4, 4 },
11610 #define SH_BLTIN_SISF 10
11611 { 4, 2 },
11612 #define SH_BLTIN_LDUA_L 11
11613 { 2, 10 },
11614 #define SH_BLTIN_LDUA_Q 12
11615 { 1, 10 },
11616 #define SH_BLTIN_STUA_L 13
11617 { 0, 10, 2 },
11618 #define SH_BLTIN_STUA_Q 14
11619 { 0, 10, 1 },
11620 #define SH_BLTIN_LDUA_L64 15
11621 { 2, 9 },
11622 #define SH_BLTIN_LDUA_Q64 16
11623 { 1, 9 },
11624 #define SH_BLTIN_STUA_L64 17
11625 { 0, 9, 2 },
11626 #define SH_BLTIN_STUA_Q64 18
11627 { 0, 9, 1 },
11628 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11629 #define SH_BLTIN_2 19
11630 #define SH_BLTIN_SU 19
11631 { 1, 2 },
11632 #define SH_BLTIN_3 20
11633 #define SH_BLTIN_SUS 20
11634 { 2, 2, 1 },
11635 #define SH_BLTIN_PSSV 21
11636 { 0, 8, 2, 2 },
11637 #define SH_BLTIN_XXUU 22
11638 #define SH_BLTIN_UUUU 22
11639 { 1, 1, 1, 1 },
11640 #define SH_BLTIN_PV 23
11641 { 0, 8 },
11642 #define SH_BLTIN_VP 24
11643 { 8, 0 },
11645 /* mcmv: operands considered unsigned. */
11646 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11647 /* mperm: control value considered unsigned int. */
11648 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11649 /* mshards_q: returns signed short. */
11650 /* nsb: takes long long arg, returns unsigned char. */
11651 static struct builtin_description bdesc[] =
11653 { shmedia_builtin_p,
11654 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11655 { shmedia_builtin_p,
11656 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11657 { shmedia_builtin_p,
11658 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11659 { shmedia_builtin_p,
11660 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11661 { shmedia_builtin_p,
11662 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11663 { shmedia_builtin_p,
11664 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11665 { shmedia_builtin_p,
11666 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11667 { shmedia_builtin_p,
11668 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11669 { shmedia_builtin_p,
11670 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11671 { shmedia_builtin_p,
11672 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11673 { shmedia_builtin_p,
11674 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11675 { shmedia_builtin_p,
11676 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11677 { shmedia_builtin_p,
11678 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11679 { shmedia_builtin_p,
11680 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11681 { shmedia_builtin_p,
11682 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11683 { shmedia_builtin_p,
11684 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11685 { shmedia_builtin_p,
11686 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11687 { shmedia_builtin_p,
11688 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11689 { shmedia_builtin_p,
11690 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11691 { shmedia_builtin_p,
11692 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11693 { shmedia_builtin_p,
11694 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11695 { shmedia_builtin_p,
11696 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11697 { shmedia_builtin_p,
11698 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11699 { shmedia_builtin_p,
11700 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11701 { shmedia_builtin_p,
11702 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11703 { shmedia_builtin_p,
11704 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11705 { shmedia_builtin_p,
11706 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11707 { shmedia_builtin_p,
11708 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11709 { shmedia_builtin_p,
11710 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11711 { shmedia_builtin_p,
11712 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11713 { shmedia_builtin_p,
11714 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11715 { shmedia_builtin_p,
11716 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11717 { shmedia_builtin_p,
11718 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11719 { shmedia_builtin_p,
11720 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11721 { shmedia_builtin_p,
11722 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11723 { shmedia_builtin_p,
11724 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11725 { shmedia_builtin_p,
11726 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11727 { shmedia_builtin_p,
11728 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11729 { shmedia_builtin_p,
11730 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11731 { shmedia_builtin_p,
11732 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11733 { shmedia_builtin_p,
11734 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11735 { shmedia_builtin_p,
11736 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11737 { shmedia_builtin_p,
11738 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11739 { shmedia_builtin_p,
11740 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11741 { shmedia_builtin_p,
11742 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11743 { shmedia_builtin_p,
11744 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11745 { shmedia_builtin_p,
11746 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11747 { shmedia_builtin_p,
11748 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11749 { shmedia_builtin_p,
11750 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11751 { shmedia_builtin_p,
11752 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11753 { shmedia_builtin_p,
11754 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11755 { shmedia_builtin_p,
11756 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11757 { shmedia_builtin_p,
11758 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11759 { shmedia_builtin_p,
11760 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11761 { shmedia_builtin_p,
11762 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11763 { shmedia_builtin_p,
11764 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11765 { shmedia_builtin_p,
11766 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11767 { shmedia_builtin_p,
11768 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11769 { shmedia_builtin_p,
11770 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11771 { shmedia_builtin_p,
11772 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11773 { shmedia_builtin_p,
11774 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11775 { shmedia_builtin_p,
11776 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11777 { shmedia_builtin_p,
11778 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11779 { shmedia_builtin_p,
11780 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11781 { shmedia_builtin_p,
11782 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11783 { shmedia_builtin_p,
11784 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11785 { shmedia_builtin_p,
11786 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11787 { shmedia_builtin_p,
11788 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11789 { shmedia_builtin_p,
11790 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11791 { shmedia_builtin_p,
11792 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11793 { shmedia_builtin_p,
11794 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11795 { shmedia_builtin_p,
11796 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11797 { shmedia_builtin_p,
11798 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11799 { shmedia_builtin_p,
11800 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11801 { shmedia_builtin_p,
11802 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11803 { shmedia_builtin_p,
11804 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11805 { shmedia_builtin_p,
11806 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11807 { shmedia_builtin_p,
11808 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11809 { shmedia_builtin_p,
11810 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11811 { shmedia_builtin_p,
11812 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11813 { shmedia_builtin_p,
11814 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11815 { shmedia_builtin_p,
11816 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11817 { shmedia_builtin_p,
11818 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11821 static void
11822 sh_init_builtins (void)
11824 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11825 memset (shared, 0, sizeof shared);
11827 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
11829 builtin_description* d = &bdesc[di];
11831 if (!d->is_enabled ())
11832 continue;
11834 tree type, arg_type = NULL_TREE;
11835 int signature = d->signature;
11837 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11838 type = shared[signature];
11839 else
11841 int has_result = signature_args[signature][0] != 0;
11842 tree args[3];
11844 if ((signature_args[signature][1] & 8)
11845 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11846 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11847 continue;
11848 if (! TARGET_FPU_ANY
11849 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11850 continue;
11851 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
11852 args[i] = NULL_TREE;
11853 for (int i = 3; ; i--)
11855 int arg = signature_args[signature][i];
11856 int opno = i - 1 + has_result;
11858 if (arg & 8)
11859 arg_type = ptr_type_node;
11860 else if (arg)
11861 arg_type = (*lang_hooks.types.type_for_mode)
11862 (insn_data[d->icode].operand[opno].mode, (arg & 1));
11863 else if (i)
11864 continue;
11865 else
11866 arg_type = void_type_node;
11867 if (i == 0)
11868 break;
11869 args[i-1] = arg_type;
11871 type = build_function_type_list (arg_type, args[0], args[1],
11872 args[2], NULL_TREE);
11873 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11874 shared[signature] = type;
11876 d->fndecl =
11877 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11878 NULL, NULL_TREE);
11882 /* Implements target hook vector_mode_supported_p. */
11883 bool
11884 sh_vector_mode_supported_p (enum machine_mode mode)
11886 if (TARGET_FPU_ANY
11887 && ((mode == V2SFmode)
11888 || (mode == V4SFmode)
11889 || (mode == V16SFmode)))
11890 return true;
11892 else if (TARGET_SHMEDIA
11893 && ((mode == V8QImode)
11894 || (mode == V2HImode)
11895 || (mode == V4HImode)
11896 || (mode == V2SImode)))
11897 return true;
11899 return false;
11902 bool
11903 sh_frame_pointer_required (void)
11905 /* If needed override this in other tm.h files to cope with various OS
11906 lossage requiring a frame pointer. */
11907 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11908 return true;
11910 if (crtl->profile)
11911 return true;
11913 return false;
11916 /* Implements target hook dwarf_calling_convention. Return an enum
11917 of dwarf_calling_convention. */
11919 sh_dwarf_calling_convention (const_tree func)
11921 if (sh_attr_renesas_p (func))
11922 return DW_CC_GNU_renesas_sh;
11924 return DW_CC_normal;
11927 /* Returns the sh builtin decl for CODE. */
11928 static tree
11929 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11931 if (code >= ARRAY_SIZE (bdesc))
11932 return error_mark_node;
11934 if (!bdesc[code].is_enabled ())
11935 return error_mark_node;
11937 return bdesc[code].fndecl;
11940 /* Expand an expression EXP that calls a built-in function,
11941 with result going to TARGET if that's convenient
11942 (and in mode MODE if that's convenient).
11943 SUBTARGET may be used as the target for computing one of EXP's operands.
11944 IGNORE is nonzero if the value is to be ignored. */
11945 static rtx
11946 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11947 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11949 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11950 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11951 const struct builtin_description *d = &bdesc[fcode];
11952 enum insn_code icode = d->icode;
11953 int signature = d->signature;
11954 int nop = 0;
11955 rtx op[4];
11957 if (signature_args[signature][0])
11959 if (ignore)
11960 return NULL_RTX;
11962 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11963 if (! target || GET_MODE (target) != tmode
11964 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11965 target = gen_reg_rtx (tmode);
11966 op[nop++] = target;
11968 else
11969 target = NULL_RTX;
11971 for (int i = 1; i <= 3; i++, nop++)
11973 tree arg;
11974 enum machine_mode opmode, argmode;
11975 tree optype;
11977 if (! signature_args[signature][i])
11978 break;
11979 arg = CALL_EXPR_ARG (exp, i - 1);
11980 if (arg == error_mark_node)
11981 return const0_rtx;
11982 if (signature_args[signature][i] & 8)
11984 opmode = ptr_mode;
11985 optype = ptr_type_node;
11987 else
11989 opmode = insn_data[icode].operand[nop].mode;
11990 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11992 argmode = TYPE_MODE (TREE_TYPE (arg));
11993 if (argmode != opmode)
11994 arg = build1 (NOP_EXPR, optype, arg);
11995 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11996 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11997 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12000 rtx pat = NULL_RTX;
12002 switch (nop)
12004 case 1:
12005 pat = (*insn_data[d->icode].genfun) (op[0]);
12006 break;
12007 case 2:
12008 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12009 break;
12010 case 3:
12011 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12012 break;
12013 case 4:
12014 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12015 break;
12016 default:
12017 gcc_unreachable ();
12019 if (! pat)
12020 return NULL_RTX;
12021 emit_insn (pat);
12022 return target;
12025 void
12026 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12028 rtx sel0 = const0_rtx;
12029 rtx sel1 = const1_rtx;
12030 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12031 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12033 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12034 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12037 void
12038 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12040 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12042 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12043 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12046 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12047 We can allow any mode in any general register. The special registers
12048 only allow SImode. Don't allow any mode in the PR.
12050 We cannot hold DCmode values in the XD registers because alter_reg
12051 handles subregs of them incorrectly. We could work around this by
12052 spacing the XD registers like the DR registers, but this would require
12053 additional memory in every compilation to hold larger register vectors.
12054 We could hold SFmode / SCmode values in XD registers, but that
12055 would require a tertiary reload when reloading from / to memory,
12056 and a secondary reload to reload from / to general regs; that
12057 seems to be a losing proposition.
12059 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12060 it won't be ferried through GP registers first. */
12061 bool
12062 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
12064 if (SPECIAL_REGISTER_P (regno))
12065 return mode == SImode;
12067 if (regno == FPUL_REG)
12068 return (mode == SImode || mode == SFmode);
12070 if (FP_REGISTER_P (regno) && mode == SFmode)
12071 return true;
12073 if (mode == V2SFmode)
12075 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12076 || GENERAL_REGISTER_P (regno)))
12077 return true;
12078 else
12079 return false;
12082 if (mode == V4SFmode)
12084 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12085 || GENERAL_REGISTER_P (regno))
12086 return true;
12087 else
12088 return false;
12091 if (mode == V16SFmode)
12093 if (TARGET_SHMEDIA)
12095 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12096 return true;
12097 else
12098 return false;
12100 else
12101 return regno == FIRST_XD_REG;
12104 if (FP_REGISTER_P (regno))
12106 if (mode == SFmode
12107 || mode == SImode
12108 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12109 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12110 || mode == DCmode
12111 || (TARGET_SHMEDIA
12112 && (mode == DFmode || mode == DImode
12113 || mode == V2SFmode || mode == TImode)))
12114 && ((regno - FIRST_FP_REG) & 1) == 0)
12115 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12116 && ((regno - FIRST_FP_REG) & 3) == 0))
12117 return true;
12118 else
12119 return false;
12122 if (XD_REGISTER_P (regno))
12123 return mode == DFmode;
12125 if (TARGET_REGISTER_P (regno))
12126 return (mode == DImode || mode == SImode || mode == PDImode);
12128 if (regno == PR_REG)
12129 return mode == SImode;
12131 if (regno == FPSCR_REG)
12132 return mode == PSImode;
12134 /* FIXME. This works around PR target/37633 for -O0. */
12135 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12137 unsigned int n = GET_MODE_SIZE (mode) / 8;
12139 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12140 && regno <= FIRST_GENERAL_REG + 14)
12141 return false;
12144 return true;
12147 /* Return the class of registers for which a mode change from FROM to TO
12148 is invalid. */
12149 bool
12150 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
12151 enum reg_class rclass)
12153 /* We want to enable the use of SUBREGs as a means to
12154 VEC_SELECT a single element of a vector. */
12156 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12157 This can be problematic when SFmode vector subregs need to be accessed
12158 on the stack with displacement addressing, as it happens with -O0.
12159 Thus we disallow the mode change for -O0. */
12160 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12161 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12163 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12165 if (TARGET_LITTLE_ENDIAN)
12167 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12168 return reg_classes_intersect_p (DF_REGS, rclass);
12170 else
12172 if (GET_MODE_SIZE (from) < 8)
12173 return reg_classes_intersect_p (DF_HI_REGS, rclass);
12176 return false;
12179 /* Return true if registers in machine mode MODE will likely be
12180 allocated to registers in small register classes. */
12181 bool
12182 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
12184 return (! TARGET_SHMEDIA);
12187 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12188 that label is used. */
12189 void
12190 sh_mark_label (rtx address, int nuses)
12192 if (GOTOFF_P (address))
12194 /* Extract the label or symbol. */
12195 address = XEXP (address, 0);
12196 if (GET_CODE (address) == PLUS)
12197 address = XEXP (address, 0);
12198 address = XVECEXP (address, 0, 0);
12200 if (GET_CODE (address) == LABEL_REF
12201 && LABEL_P (XEXP (address, 0)))
12202 LABEL_NUSES (XEXP (address, 0)) += nuses;
12205 /* Compute extra cost of moving data between one register class
12206 and another.
12208 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12209 uses this information. Hence, the general register <-> floating point
12210 register information here is not used for SFmode. */
12211 static int
12212 sh_register_move_cost (enum machine_mode mode,
12213 reg_class_t srcclass, reg_class_t dstclass)
12215 if (dstclass == T_REGS || dstclass == PR_REGS)
12216 return 10;
12218 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12219 return 4;
12221 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12222 && REGCLASS_HAS_FP_REG (srcclass)
12223 && REGCLASS_HAS_FP_REG (dstclass))
12224 return 4;
12226 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12227 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12229 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12230 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12231 return 9;
12233 if ((REGCLASS_HAS_FP_REG (dstclass)
12234 && REGCLASS_HAS_GENERAL_REG (srcclass))
12235 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12236 && REGCLASS_HAS_FP_REG (srcclass)))
12238 /* Discourage trying to use fp regs for a pointer. This also
12239 discourages fp regs with SImode because Pmode is an alias
12240 of SImode on this target. See PR target/48596. */
12241 int addend = (mode == Pmode) ? 40 : 0;
12243 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12244 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12247 if ((dstclass == FPUL_REGS
12248 && REGCLASS_HAS_GENERAL_REG (srcclass))
12249 || (srcclass == FPUL_REGS
12250 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12251 return 5;
12253 if ((dstclass == FPUL_REGS
12254 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12255 || (srcclass == FPUL_REGS
12256 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12257 return 7;
12259 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12260 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12261 return 20;
12263 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12264 if (TARGET_SHMEDIA
12265 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12267 if (sh_gettrcost >= 0)
12268 return sh_gettrcost;
12269 else if (!TARGET_PT_FIXED)
12270 return 100;
12273 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12274 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12275 return 4;
12277 if (TARGET_SHMEDIA
12278 || (TARGET_FMOVD
12279 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12280 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12281 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12283 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12286 static rtx
12287 emit_load_ptr (rtx reg, rtx addr)
12289 rtx mem = gen_const_mem (ptr_mode, addr);
12291 if (Pmode != ptr_mode)
12292 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12293 return emit_move_insn (reg, mem);
12296 static void
12297 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12298 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12299 tree function)
12301 CUMULATIVE_ARGS cum;
12302 int structure_value_byref = 0;
12303 rtx this_rtx, this_value, sibcall, insns, funexp;
12304 tree funtype = TREE_TYPE (function);
12305 int simple_add = CONST_OK_FOR_ADD (delta);
12306 int did_load = 0;
12307 rtx scratch0, scratch1, scratch2;
12308 unsigned i;
12310 reload_completed = 1;
12311 epilogue_completed = 1;
12312 crtl->uses_only_leaf_regs = 1;
12314 emit_note (NOTE_INSN_PROLOGUE_END);
12316 /* Find the "this" pointer. We have such a wide range of ABIs for the
12317 SH that it's best to do this completely machine independently.
12318 "this" is passed as first argument, unless a structure return pointer
12319 comes first, in which case "this" comes second. */
12320 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12321 #ifndef PCC_STATIC_STRUCT_RETURN
12322 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12323 structure_value_byref = 1;
12324 #endif /* not PCC_STATIC_STRUCT_RETURN */
12325 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12327 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12329 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12331 this_rtx
12332 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12334 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12335 static chain pointer (even if you can't have nested virtual functions
12336 right now, someone might implement them sometime), and the rest of the
12337 registers are used for argument passing, are callee-saved, or reserved. */
12338 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12339 -ffixed-reg has been used. */
12340 if (! call_used_regs[0] || fixed_regs[0])
12341 error ("r0 needs to be available as a call-clobbered register");
12342 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12343 if (! TARGET_SH5)
12345 if (call_used_regs[1] && ! fixed_regs[1])
12346 scratch1 = gen_rtx_REG (ptr_mode, 1);
12347 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12348 pointing where to return struct values. */
12349 if (call_used_regs[3] && ! fixed_regs[3])
12350 scratch2 = gen_rtx_REG (Pmode, 3);
12352 else if (TARGET_SHMEDIA)
12354 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12355 if (i != REGNO (scratch0) &&
12356 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12358 scratch1 = gen_rtx_REG (ptr_mode, i);
12359 break;
12361 if (scratch1 == scratch0)
12362 error ("need a second call-clobbered general purpose register");
12363 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12364 if (call_used_regs[i] && ! fixed_regs[i])
12366 scratch2 = gen_rtx_REG (Pmode, i);
12367 break;
12369 if (scratch2 == scratch0)
12370 error ("need a call-clobbered target register");
12373 this_value = plus_constant (Pmode, this_rtx, delta);
12374 if (vcall_offset
12375 && (simple_add || scratch0 != scratch1)
12376 && strict_memory_address_p (ptr_mode, this_value))
12378 emit_load_ptr (scratch0, this_value);
12379 did_load = 1;
12382 if (!delta)
12383 ; /* Do nothing. */
12384 else if (simple_add)
12385 emit_move_insn (this_rtx, this_value);
12386 else
12388 emit_move_insn (scratch1, GEN_INT (delta));
12389 emit_insn (gen_add2_insn (this_rtx, scratch1));
12392 if (vcall_offset)
12394 rtx offset_addr;
12396 if (!did_load)
12397 emit_load_ptr (scratch0, this_rtx);
12399 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12400 if (strict_memory_address_p (ptr_mode, offset_addr))
12401 ; /* Do nothing. */
12402 else if (! TARGET_SH5 && scratch0 != scratch1)
12404 /* scratch0 != scratch1, and we have indexed loads. Get better
12405 schedule by loading the offset into r1 and using an indexed
12406 load - then the load of r1 can issue before the load from
12407 (this_rtx + delta) finishes. */
12408 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12409 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12411 else if (CONST_OK_FOR_ADD (vcall_offset))
12413 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12414 offset_addr = scratch0;
12416 else if (scratch0 != scratch1)
12418 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12419 emit_insn (gen_add2_insn (scratch0, scratch1));
12420 offset_addr = scratch0;
12422 else
12423 gcc_unreachable (); /* FIXME */
12424 emit_load_ptr (scratch0, offset_addr);
12426 if (Pmode != ptr_mode)
12427 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12428 emit_insn (gen_add2_insn (this_rtx, scratch0));
12431 /* Generate a tail call to the target function. */
12432 if (! TREE_USED (function))
12434 assemble_external (function);
12435 TREE_USED (function) = 1;
12437 funexp = XEXP (DECL_RTL (function), 0);
12438 /* If the function is overridden, so is the thunk, hence we don't
12439 need GOT addressing even if this is a public symbol. */
12440 #if 0
12441 if (TARGET_SH1 && ! flag_weak)
12442 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12443 else
12444 #endif
12445 if (TARGET_SH2 && flag_pic)
12447 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12448 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12450 else
12452 if (TARGET_SHMEDIA && flag_pic)
12454 funexp = gen_sym2PIC (funexp);
12455 PUT_MODE (funexp, Pmode);
12457 emit_move_insn (scratch2, funexp);
12458 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12459 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12461 sibcall = emit_call_insn (sibcall);
12462 SIBLING_CALL_P (sibcall) = 1;
12463 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12464 emit_barrier ();
12466 /* Run just enough of rest_of_compilation to do scheduling and get
12467 the insns emitted. Note that use_thunk calls
12468 assemble_start_function and assemble_end_function. */
12470 insns = get_insns ();
12472 if (optimize > 0)
12474 if (! cfun->cfg)
12475 init_flow (cfun);
12476 split_all_insns_noflow ();
12479 sh_reorg ();
12480 shorten_branches (insns);
12481 final_start_function (insns, file, 1);
12482 final (insns, file, 1);
12483 final_end_function ();
12485 reload_completed = 0;
12486 epilogue_completed = 0;
12490 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12492 rtx sym;
12494 /* If this is not an ordinary function, the name usually comes from a
12495 string literal or an sprintf buffer. Make sure we use the same
12496 string consistently, so that cse will be able to unify address loads. */
12497 if (kind != FUNCTION_ORDINARY)
12498 name = IDENTIFIER_POINTER (get_identifier (name));
12499 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12500 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12501 if (flag_pic)
12502 switch (kind)
12504 case FUNCTION_ORDINARY:
12505 break;
12506 case SFUNC_GOT:
12508 rtx reg = target ? target : gen_reg_rtx (Pmode);
12510 emit_insn (gen_symGOT2reg (reg, sym));
12511 sym = reg;
12512 break;
12514 case SFUNC_STATIC:
12516 /* ??? To allow cse to work, we use GOTOFF relocations.
12517 We could add combiner patterns to transform this into
12518 straight pc-relative calls with sym2PIC / bsrf when
12519 label load and function call are still 1:1 and in the
12520 same basic block during combine. */
12521 rtx reg = target ? target : gen_reg_rtx (Pmode);
12523 emit_insn (gen_symGOTOFF2reg (reg, sym));
12524 sym = reg;
12525 break;
12528 if (target && sym != target)
12530 emit_move_insn (target, sym);
12531 return target;
12533 return sym;
12536 /* Find the number of a general purpose register in S. */
12537 static int
12538 scavenge_reg (HARD_REG_SET *s)
12540 int r;
12541 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12542 if (TEST_HARD_REG_BIT (*s, r))
12543 return r;
12544 return -1;
12548 sh_get_pr_initial_val (void)
12550 rtx val;
12552 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12553 PR register on SHcompact, because it might be clobbered by the prologue.
12554 We check first if that is known to be the case. */
12555 if (TARGET_SHCOMPACT
12556 && ((crtl->args.info.call_cookie
12557 & ~ CALL_COOKIE_RET_TRAMP (1))
12558 || crtl->saves_all_registers))
12559 return gen_frame_mem (SImode, return_address_pointer_rtx);
12561 /* If we haven't finished rtl generation, there might be a nonlocal label
12562 that we haven't seen yet.
12563 ??? get_hard_reg_initial_val fails if it is called after register
12564 allocation has started, unless it has been called before for the
12565 same register. And even then, we end in trouble if we didn't use
12566 the register in the same basic block before. So call
12567 get_hard_reg_initial_val now and wrap it in an unspec if we might
12568 need to replace it. */
12569 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12570 combine can put the pseudo returned by get_hard_reg_initial_val into
12571 instructions that need a general purpose registers, which will fail to
12572 be recognized when the pseudo becomes allocated to PR. */
12574 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12575 if (TARGET_SH1)
12576 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12577 return val;
12580 bool
12581 sh_expand_t_scc (rtx operands[])
12583 enum rtx_code code = GET_CODE (operands[1]);
12584 rtx target = operands[0];
12585 rtx op0 = operands[2];
12586 rtx op1 = operands[3];
12587 rtx result = target;
12588 HOST_WIDE_INT val;
12590 if (!REG_P (op0) || REGNO (op0) != T_REG
12591 || !CONST_INT_P (op1))
12592 return false;
12593 if (!REG_P (result))
12594 result = gen_reg_rtx (SImode);
12595 val = INTVAL (op1);
12596 if ((code == EQ && val == 1) || (code == NE && val == 0))
12597 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12598 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12599 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12600 else if (code == EQ || code == NE)
12601 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12602 else
12603 return false;
12604 if (result != target)
12605 emit_move_insn (target, result);
12606 return true;
12609 /* INSN is an sfunc; return the rtx that describes the address used. */
12610 static rtx
12611 extract_sfunc_addr (rtx insn)
12613 rtx pattern, part = NULL_RTX;
12614 int len, i;
12616 pattern = PATTERN (insn);
12617 len = XVECLEN (pattern, 0);
12618 for (i = 0; i < len; i++)
12620 part = XVECEXP (pattern, 0, i);
12621 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12622 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12623 return XEXP (part, 0);
12625 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12626 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12629 /* Verify that the register in use_sfunc_addr still agrees with the address
12630 used in the sfunc. This prevents fill_slots_from_thread from changing
12631 use_sfunc_addr.
12632 INSN is the use_sfunc_addr instruction, and REG is the register it
12633 guards. */
12634 bool
12635 check_use_sfunc_addr (rtx insn, rtx reg)
12637 /* Search for the sfunc. It should really come right after INSN. */
12638 while ((insn = NEXT_INSN (insn)))
12640 if (LABEL_P (insn) || JUMP_P (insn))
12641 break;
12642 if (! INSN_P (insn))
12643 continue;
12645 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
12646 insn = XVECEXP (PATTERN (insn), 0, 0);
12647 if (GET_CODE (PATTERN (insn)) != PARALLEL
12648 || get_attr_type (insn) != TYPE_SFUNC)
12649 continue;
12650 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12652 gcc_unreachable ();
12655 /* This function returns a constant rtx that represents 2**15 / pi in
12656 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12657 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12658 static GTY(()) rtx sh_fsca_sf2int_rtx;
12661 sh_fsca_sf2int (void)
12663 if (! sh_fsca_sf2int_rtx)
12665 REAL_VALUE_TYPE rv;
12667 real_from_string (&rv, "10430.378350470453");
12668 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12671 return sh_fsca_sf2int_rtx;
12674 /* This function returns a constant rtx that represents pi / 2**15 in
12675 SFmode. It's used to scale SFmode angles, in radians, to a
12676 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12677 maps to 0x10000. */
12678 static GTY(()) rtx sh_fsca_int2sf_rtx;
12681 sh_fsca_int2sf (void)
12683 if (! sh_fsca_int2sf_rtx)
12685 REAL_VALUE_TYPE rv;
12687 real_from_string (&rv, "9.587379924285257e-5");
12688 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12691 return sh_fsca_int2sf_rtx;
12694 /* Initialize the CUMULATIVE_ARGS structure. */
12695 void
12696 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12697 tree fntype,
12698 rtx libname ATTRIBUTE_UNUSED,
12699 tree fndecl,
12700 signed int n_named_args,
12701 enum machine_mode mode)
12703 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12704 pcum->free_single_fp_reg = 0;
12705 pcum->stack_regs = 0;
12706 pcum->byref_regs = 0;
12707 pcum->byref = 0;
12708 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12710 /* XXX - Should we check TARGET_HITACHI here ??? */
12711 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12713 if (fntype)
12715 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12716 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12717 pcum->prototype_p = prototype_p (fntype);
12718 pcum->arg_count [(int) SH_ARG_INT]
12719 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12721 pcum->call_cookie
12722 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12723 && pcum->arg_count [(int) SH_ARG_INT] == 0
12724 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12725 ? int_size_in_bytes (TREE_TYPE (fntype))
12726 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12727 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12728 == FIRST_RET_REG));
12730 else
12732 pcum->arg_count [(int) SH_ARG_INT] = 0;
12733 pcum->prototype_p = FALSE;
12734 if (mode != VOIDmode)
12736 pcum->call_cookie =
12737 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12738 && GET_MODE_SIZE (mode) > 4
12739 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12741 /* If the default ABI is the Renesas ABI then all library
12742 calls must assume that the library will be using the
12743 Renesas ABI. So if the function would return its result
12744 in memory then we must force the address of this memory
12745 block onto the stack. Ideally we would like to call
12746 targetm.calls.return_in_memory() here but we do not have
12747 the TYPE or the FNDECL available so we synthesize the
12748 contents of that function as best we can. */
12749 pcum->force_mem =
12750 (TARGET_DEFAULT & MASK_HITACHI)
12751 && (mode == BLKmode
12752 || (GET_MODE_SIZE (mode) > 4
12753 && !(mode == DFmode
12754 && TARGET_FPU_DOUBLE)));
12756 else
12758 pcum->call_cookie = 0;
12759 pcum->force_mem = FALSE;
12764 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12765 not enter into CONST_DOUBLE for the replace.
12767 Note that copying is not done so X must not be shared unless all copies
12768 are to be modified.
12770 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12771 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12772 replacements[n*2+1] - and that we take mode changes into account.
12774 If a replacement is ambiguous, return NULL_RTX.
12776 If MODIFY is zero, don't modify any rtl in place,
12777 just return zero or nonzero for failure / success. */
12779 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12781 int i, j;
12782 const char *fmt;
12784 /* The following prevents loops occurrence when we change MEM in
12785 CONST_DOUBLE onto the same CONST_DOUBLE. */
12786 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
12787 return x;
12789 for (i = n_replacements - 1; i >= 0 ; i--)
12790 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12791 return replacements[i*2+1];
12793 /* Allow this function to make replacements in EXPR_LISTs. */
12794 if (x == NULL_RTX)
12795 return NULL_RTX;
12797 if (GET_CODE (x) == SUBREG)
12799 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12800 n_replacements, modify);
12802 if (CONST_INT_P (new_rtx))
12804 x = simplify_subreg (GET_MODE (x), new_rtx,
12805 GET_MODE (SUBREG_REG (x)),
12806 SUBREG_BYTE (x));
12807 if (! x)
12808 abort ();
12810 else if (modify)
12811 SUBREG_REG (x) = new_rtx;
12813 return x;
12815 else if (REG_P (x))
12817 unsigned regno = REGNO (x);
12818 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12819 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12820 rtx result = NULL_RTX;
12822 for (i = n_replacements - 1; i >= 0; i--)
12824 rtx from = replacements[i*2];
12825 rtx to = replacements[i*2+1];
12826 unsigned from_regno, from_nregs, to_regno, new_regno;
12828 if (!REG_P (from))
12829 continue;
12830 from_regno = REGNO (from);
12831 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12832 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12833 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12835 if (regno < from_regno
12836 || regno + nregs > from_regno + nregs
12837 || !REG_P (to)
12838 || result)
12839 return NULL_RTX;
12840 to_regno = REGNO (to);
12841 if (to_regno < FIRST_PSEUDO_REGISTER)
12843 new_regno = regno + to_regno - from_regno;
12844 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12845 != nregs)
12846 return NULL_RTX;
12847 result = gen_rtx_REG (GET_MODE (x), new_regno);
12849 else if (GET_MODE (x) <= GET_MODE (to))
12850 result = gen_lowpart_common (GET_MODE (x), to);
12851 else
12852 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12855 return result ? result : x;
12857 else if (GET_CODE (x) == ZERO_EXTEND)
12859 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12860 n_replacements, modify);
12862 if (CONST_INT_P (new_rtx))
12864 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12865 new_rtx, GET_MODE (XEXP (x, 0)));
12866 if (! x)
12867 abort ();
12869 else if (modify)
12870 XEXP (x, 0) = new_rtx;
12872 return x;
12875 fmt = GET_RTX_FORMAT (GET_CODE (x));
12876 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12878 rtx new_rtx;
12880 if (fmt[i] == 'e')
12882 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12883 n_replacements, modify);
12884 if (!new_rtx)
12885 return NULL_RTX;
12886 if (modify)
12887 XEXP (x, i) = new_rtx;
12889 else if (fmt[i] == 'E')
12890 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12892 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12893 n_replacements, modify);
12894 if (!new_rtx)
12895 return NULL_RTX;
12896 if (modify)
12897 XVECEXP (x, i, j) = new_rtx;
12901 return x;
12905 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12907 enum rtx_code code = TRUNCATE;
12909 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12911 rtx inner = XEXP (x, 0);
12912 enum machine_mode inner_mode = GET_MODE (inner);
12914 if (inner_mode == mode)
12915 return inner;
12916 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12917 x = inner;
12918 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12919 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12921 code = GET_CODE (x);
12922 x = inner;
12925 return gen_rtx_fmt_e (code, mode, x);
12928 /* Called via for_each_rtx after reload, to clean up truncates of
12929 registers that span multiple actual hard registers. */
12931 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12933 rtx x = *p, reg;
12935 if (GET_CODE (x) != TRUNCATE)
12936 return 0;
12937 reg = XEXP (x, 0);
12938 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12940 enum machine_mode reg_mode = GET_MODE (reg);
12941 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12942 subreg_lowpart_offset (DImode, reg_mode));
12943 *(int*) n_changes += 1;
12944 return -1;
12946 return 0;
12949 /* Load and store depend on the highpart of the address. However,
12950 set_attr_alternative does not give well-defined results before reload,
12951 so we must look at the rtl ourselves to see if any of the feeding
12952 registers is used in a memref.
12954 Called by sh_contains_memref_p via for_each_rtx. */
12955 static int
12956 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12958 return (MEM_P (*loc));
12961 /* Return true iff INSN contains a MEM. */
12962 bool
12963 sh_contains_memref_p (rtx insn)
12965 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12968 /* Return true iff INSN loads a banked register. */
12969 bool
12970 sh_loads_bankedreg_p (rtx insn)
12972 if (GET_CODE (PATTERN (insn)) == SET)
12974 rtx op = SET_DEST (PATTERN(insn));
12975 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12976 return true;
12979 return false;
12982 /* FNADDR is the MEM expression from a call expander. Return an address
12983 to use in an SHmedia insn pattern. */
12985 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12987 int is_sym;
12989 fnaddr = XEXP (fnaddr, 0);
12990 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12991 if (flag_pic && is_sym)
12993 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12995 rtx reg = gen_reg_rtx (Pmode);
12997 /* We must not use GOTPLT for sibcalls, because PIC_REG
12998 must be restored before the PLT code gets to run. */
12999 if (is_sibcall)
13000 emit_insn (gen_symGOT2reg (reg, fnaddr));
13001 else
13002 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13003 fnaddr = reg;
13005 else
13007 fnaddr = gen_sym2PIC (fnaddr);
13008 PUT_MODE (fnaddr, Pmode);
13011 /* If ptabs might trap, make this visible to the rest of the compiler.
13012 We generally assume that symbols pertain to valid locations, but
13013 it is possible to generate invalid symbols with asm or linker tricks.
13014 In a list of functions where each returns its successor, an invalid
13015 symbol might denote an empty list. */
13016 if (!TARGET_PT_FIXED
13017 && (!is_sym || TARGET_INVALID_SYMBOLS)
13018 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13020 rtx tr = gen_reg_rtx (PDImode);
13022 emit_insn (gen_ptabs (tr, fnaddr));
13023 fnaddr = tr;
13025 else if (! target_reg_operand (fnaddr, Pmode))
13026 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13027 return fnaddr;
13030 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13031 static reg_class_t
13032 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13034 if (rclass == NO_REGS
13035 && TARGET_SHMEDIA
13036 && (CONST_DOUBLE_P (x)
13037 || GET_CODE (x) == SYMBOL_REF
13038 || PIC_ADDR_P (x)))
13039 return GENERAL_REGS;
13041 return rclass;
13044 /* Implement TARGET_SECONDARY_RELOAD. */
13045 static reg_class_t
13046 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13047 enum machine_mode mode, secondary_reload_info *sri)
13049 enum reg_class rclass = (enum reg_class) rclass_i;
13051 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13052 && REG_P (XEXP (XEXP (x, 0), 0))
13053 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13054 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13056 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13057 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13059 if (REG_P (x) && REGNO (x) == GBR_REG)
13060 return NO_REGS;
13062 if (in_p)
13064 if (REGCLASS_HAS_FP_REG (rclass)
13065 && ! TARGET_SHMEDIA
13066 && immediate_operand ((x), mode)
13067 && ! ((fp_zero_operand (x) || fp_one_operand (x))
13068 && mode == SFmode && fldi_ok ()))
13069 switch (mode)
13071 case SFmode:
13072 sri->icode = CODE_FOR_reload_insf__frn;
13073 return NO_REGS;
13074 case DFmode:
13075 sri->icode = CODE_FOR_reload_indf__frn;
13076 return NO_REGS;
13077 case SImode:
13078 /* ??? If we knew that we are in the appropriate mode -
13079 single precision - we could use a reload pattern directly. */
13080 return FPUL_REGS;
13081 default:
13082 abort ();
13084 if (rclass == FPUL_REGS
13085 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13086 || REGNO (x) == T_REG))
13087 || GET_CODE (x) == PLUS))
13088 return GENERAL_REGS;
13089 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13091 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13092 return GENERAL_REGS;
13093 else if (mode == SFmode)
13094 return FP_REGS;
13095 sri->icode = CODE_FOR_reload_insi__i_fpul;
13096 return NO_REGS;
13098 if (rclass == FPSCR_REGS
13099 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13100 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13101 return GENERAL_REGS;
13102 if (REGCLASS_HAS_FP_REG (rclass)
13103 && TARGET_SHMEDIA
13104 && immediate_operand (x, mode)
13105 && x != CONST0_RTX (GET_MODE (x))
13106 && GET_MODE (x) != V4SFmode)
13107 return GENERAL_REGS;
13108 if ((mode == QImode || mode == HImode)
13109 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13111 sri->icode = ((mode == QImode)
13112 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13113 return NO_REGS;
13115 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13116 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13117 return TARGET_REGS;
13118 } /* end of input-only processing. */
13120 if (((REGCLASS_HAS_FP_REG (rclass)
13121 && (REG_P (x)
13122 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13123 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13124 && TARGET_FMOVD))))
13125 || (REGCLASS_HAS_GENERAL_REG (rclass)
13126 && REG_P (x)
13127 && FP_REGISTER_P (REGNO (x))))
13128 && ! TARGET_SHMEDIA
13129 && (mode == SFmode || mode == SImode))
13130 return FPUL_REGS;
13131 if ((rclass == FPUL_REGS
13132 || (REGCLASS_HAS_FP_REG (rclass)
13133 && ! TARGET_SHMEDIA && mode == SImode))
13134 && (MEM_P (x)
13135 || (REG_P (x)
13136 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13137 || REGNO (x) == T_REG
13138 || system_reg_operand (x, VOIDmode)))))
13140 if (rclass == FPUL_REGS)
13141 return GENERAL_REGS;
13142 return FPUL_REGS;
13144 if ((rclass == TARGET_REGS
13145 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13146 && !satisfies_constraint_Csy (x)
13147 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13148 return GENERAL_REGS;
13149 if ((rclass == MAC_REGS || rclass == PR_REGS)
13150 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13151 && rclass != REGNO_REG_CLASS (REGNO (x)))
13152 return GENERAL_REGS;
13153 if (rclass != GENERAL_REGS && REG_P (x)
13154 && TARGET_REGISTER_P (REGNO (x)))
13155 return GENERAL_REGS;
13157 /* If here fall back to loading FPUL register through general registers.
13158 This case can happen when movsi_ie insn is picked initially to
13159 load/store the FPUL register from/to another register, and then the
13160 other register is allocated on the stack. */
13161 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13162 return GENERAL_REGS;
13164 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13165 the other operand.
13166 On SH2A could also just leave it alone here, which would result in a
13167 4 byte move insn being generated instead. However, for this to work
13168 the insns must have the appropriate alternatives. */
13169 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13170 && satisfies_constraint_Sdd (x)
13171 && sh_disp_addr_displacement (x)
13172 <= sh_max_mov_insn_displacement (mode, false))
13173 return R0_REGS;
13175 /* When reload is trying to address a QImode or HImode subreg on the stack,
13176 force any subreg byte into R0_REGS, as this is going to become a
13177 displacement address.
13178 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13179 is on the stack, the memref to it might already require a displacement
13180 and that has to be added to the final address. At this point we don't
13181 know the cumulative displacement so we assume the worst case. */
13182 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13183 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13184 return R0_REGS;
13186 return NO_REGS;
13189 static void
13190 sh_conditional_register_usage (void)
13192 int regno;
13193 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13194 if (! VALID_REGISTER_P (regno))
13195 fixed_regs[regno] = call_used_regs[regno] = 1;
13196 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13197 if (TARGET_SH5)
13199 call_used_regs[FIRST_GENERAL_REG + 8]
13200 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13201 call_really_used_regs[FIRST_GENERAL_REG + 8]
13202 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13204 if (TARGET_SHMEDIA)
13206 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13207 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13208 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13210 if (flag_pic)
13212 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13213 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13215 /* Renesas saves and restores mac registers on call. */
13216 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13218 call_really_used_regs[MACH_REG] = 0;
13219 call_really_used_regs[MACL_REG] = 0;
13221 for (regno = FIRST_FP_REG + (TARGET_LITTLE_ENDIAN != 0);
13222 regno <= LAST_FP_REG; regno += 2)
13223 SET_HARD_REG_BIT (reg_class_contents[DF_HI_REGS], regno);
13224 if (TARGET_SHMEDIA)
13226 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13227 if (! fixed_regs[regno] && call_really_used_regs[regno])
13228 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13230 else
13231 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13232 if (! fixed_regs[regno] && call_really_used_regs[regno])
13233 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13236 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13238 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13239 static bool
13240 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
13242 return (TARGET_SHMEDIA
13243 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13244 || x == CONST0_RTX (mode)
13245 || !TARGET_SHMEDIA_FPU
13246 || TARGET_SHMEDIA64)
13247 : (GET_CODE (x) != CONST_DOUBLE
13248 || mode == DFmode || mode == SFmode
13249 || mode == DImode || GET_MODE (x) == VOIDmode));
13252 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13254 static void
13255 sh_init_sync_libfuncs (void)
13257 init_sync_libfuncs (UNITS_PER_WORD);
13260 /* Return true if it is appropriate to emit `ret' instructions in the
13261 body of a function. */
13262 bool
13263 sh_can_use_simple_return_p (void)
13265 HARD_REG_SET live_regs_mask;
13266 int d;
13268 /* Some targets require special return insns. */
13269 if (TARGET_SHMEDIA
13270 || (TARGET_SHCOMPACT
13271 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13272 return false;
13274 if (! reload_completed || frame_pointer_needed)
13275 return false;
13277 /* Moving prologue around does't reduce the size. */
13278 if (optimize_function_for_size_p (cfun))
13279 return false;
13281 /* Finally, allow for pr save. */
13282 d = calc_live_regs (&live_regs_mask);
13284 if (rounded_frame_size (d) > 4)
13285 return false;
13287 return true;
13290 /*------------------------------------------------------------------------------
13291 Address mode optimization support code
13294 typedef HOST_WIDE_INT disp_t;
13295 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13296 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13297 static const disp_t INVALID_DISP = MAX_DISP;
13299 /* A memory reference which is described by a base register and a
13300 displacement. */
13301 class base_reg_disp
13303 public:
13304 base_reg_disp (rtx br, disp_t d);
13306 bool is_reg (void) const;
13307 bool is_disp (void) const;
13308 rtx reg (void) const;
13309 disp_t disp (void) const;
13311 private:
13312 rtx reg_;
13313 disp_t disp_;
13316 inline
13317 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13318 : reg_ (br), disp_ (d)
13322 inline bool
13323 base_reg_disp::is_reg (void) const
13325 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13328 inline bool
13329 base_reg_disp::is_disp (void) const
13331 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13334 inline rtx
13335 base_reg_disp::reg (void) const
13337 return reg_;
13340 inline disp_t
13341 base_reg_disp::disp (void) const
13343 return disp_;
13346 /* Find the base register and calculate the displacement for a given
13347 address rtx 'x'.
13348 This is done by walking the insn list backwards and following SET insns
13349 that set the value of the specified reg 'x'. */
13350 static base_reg_disp
13351 sh_find_base_reg_disp (rtx insn, rtx x, disp_t disp = 0, rtx base_reg = NULL)
13353 if (REG_P (x))
13355 if (REGNO (x) == GBR_REG)
13356 return base_reg_disp (x, disp);
13358 /* We've reached a hard-reg. This is probably the point where
13359 function args are copied to pseudos. Do not go any further and
13360 stick to the pseudo. If the original mem addr was in a hard reg
13361 from the beginning, it will become the base reg. */
13362 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13363 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13365 /* Try to find the previous insn that sets the reg. */
13366 for (rtx i = prev_nonnote_insn (insn); i != NULL;
13367 i = prev_nonnote_insn (i))
13369 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)
13370 && CALL_P (i))
13371 break;
13373 if (!NONJUMP_INSN_P (i))
13374 continue;
13376 rtx p = PATTERN (i);
13377 if (p != NULL && GET_CODE (p) == SET && REG_P (XEXP (p, 0))
13378 && REGNO (XEXP (p, 0)) == REGNO (x))
13380 /* If the recursion can't find out any more details about the
13381 source of the set, then this reg becomes our new base reg. */
13382 return sh_find_base_reg_disp (i, XEXP (p, 1), disp, XEXP (p, 0));
13386 /* When here, no previous insn was found that sets the reg.
13387 The input reg is already the base reg. */
13388 return base_reg_disp (x, disp);
13391 else if (GET_CODE (x) == PLUS)
13393 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13394 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13396 /* Either left or right val must be a reg.
13397 We don't handle the case of 'reg + reg' here. */
13398 if (left_val.is_reg () && right_val.is_disp ())
13399 return base_reg_disp (left_val.reg (), left_val.disp ()
13400 + right_val.disp () + disp);
13401 else if (right_val.is_reg () && left_val.is_disp ())
13402 return base_reg_disp (right_val.reg (), right_val.disp ()
13403 + left_val.disp () + disp);
13404 else
13405 return base_reg_disp (base_reg, disp);
13408 else if (CONST_INT_P (x))
13409 return base_reg_disp (NULL, disp + INTVAL (x));
13411 /* Didn't find anything useful. */
13412 return base_reg_disp (base_reg, disp);
13415 /* Given an insn and a memory operand, try to find an equivalent GBR
13416 based memory address and return the corresponding new memory address.
13417 Return NULL_RTX if not found. */
13419 sh_find_equiv_gbr_addr (rtx insn, rtx mem)
13421 if (!MEM_P (mem))
13422 return NULL_RTX;
13424 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13425 if (side_effects_p (XEXP (mem, 0)))
13426 return NULL_RTX;
13428 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13430 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13432 rtx disp = GEN_INT (gbr_disp.disp ());
13433 if (gbr_displacement (disp, GET_MODE (mem)))
13434 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13437 return NULL_RTX;
13440 /*------------------------------------------------------------------------------
13441 Manual insn combine support code.
13444 /* Given a reg rtx and a start insn, try to find the insn that sets the
13445 specified reg by using the specified insn stepping function, such as
13446 'prev_nonnote_insn_bb'. When the insn is found, try to extract the rtx
13447 of the reg set. */
13448 set_of_reg
13449 sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx))
13451 set_of_reg result;
13452 result.insn = insn;
13453 result.set_rtx = NULL_RTX;
13454 result.set_src = NULL_RTX;
13456 if (!REG_P (reg) || insn == NULL_RTX)
13457 return result;
13459 for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
13460 result.insn = stepfunc (result.insn))
13462 if (BARRIER_P (result.insn))
13463 return result;
13464 if (!NONJUMP_INSN_P (result.insn))
13465 continue;
13466 if (reg_set_p (reg, result.insn))
13468 result.set_rtx = set_of (reg, result.insn);
13470 if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
13471 return result;
13473 result.set_src = XEXP (result.set_rtx, 1);
13474 return result;
13478 return result;
13481 /* Given an op rtx and an insn, try to find out whether the result of the
13482 specified op consists only of logical operations on T bit stores. */
13483 bool
13484 sh_is_logical_t_store_expr (rtx op, rtx insn)
13486 if (!logical_operator (op, SImode))
13487 return false;
13489 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13490 int op_is_t_count = 0;
13492 for (int i = 0; i < 2; ++i)
13494 if (t_reg_operand (ops[i], VOIDmode)
13495 || negt_reg_operand (ops[i], VOIDmode))
13496 op_is_t_count++;
13498 else
13500 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13501 prev_nonnote_insn_bb);
13502 if (op_set.set_src == NULL_RTX)
13503 continue;
13505 if (t_reg_operand (op_set.set_src, VOIDmode)
13506 || negt_reg_operand (op_set.set_src, VOIDmode)
13507 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13508 op_is_t_count++;
13512 return op_is_t_count == 2;
13515 /* Given the operand that is extended in a sign/zero extend insn, and the
13516 insn, try to figure out whether the sign/zero extension can be replaced
13517 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13518 NULL_RTX otherwise. */
13520 sh_try_omit_signzero_extend (rtx extended_op, rtx insn)
13522 if (REG_P (extended_op))
13523 extended_op = extended_op;
13524 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13525 extended_op = SUBREG_REG (extended_op);
13526 else
13527 return NULL_RTX;
13529 /* Reg moves must be of the same mode. */
13530 if (GET_MODE (extended_op) != SImode)
13531 return NULL_RTX;
13533 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13534 if (s.set_src == NULL_RTX)
13535 return NULL_RTX;
13537 if (t_reg_operand (s.set_src, VOIDmode)
13538 || negt_reg_operand (s.set_src, VOIDmode))
13539 return extended_op;
13541 /* If the zero extended reg was formed by a logical operation, check the
13542 operands of the logical operation. If both originated from T bit
13543 stores the zero extension can be eliminated. */
13544 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13545 return extended_op;
13547 return NULL_RTX;
13550 #include "gt-sh.h"