Reverting merge from trunk
[official-gcc.git] / gcc / config / sh / sh.c
blob088ef39631350d6c883a9cab2042365197590bdc
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2013 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "insn-config.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "flags.h"
30 #include "expr.h"
31 #include "optabs.h"
32 #include "reload.h"
33 #include "function.h"
34 #include "regs.h"
35 #include "hard-reg-set.h"
36 #include "output.h"
37 #include "insn-attr.h"
38 #include "diagnostic-core.h"
39 #include "recog.h"
40 #include "dwarf2.h"
41 #include "tm_p.h"
42 #include "target.h"
43 #include "target-def.h"
44 #include "langhooks.h"
45 #include "basic-block.h"
46 #include "df.h"
47 #include "intl.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "ggc.h"
51 #include "gimple.h"
52 #include "gimplify.h"
53 #include "cfgloop.h"
54 #include "alloc-pool.h"
55 #include "tm-constrs.h"
56 #include "opts.h"
57 #include "tree-pass.h"
58 #include "pass_manager.h"
59 #include "context.h"
61 #include <sstream>
62 #include <vector>
63 #include <algorithm>
65 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
67 /* These are some macros to abstract register modes. */
68 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
69 && ((HOST_WIDE_INT)(VALUE)) <= 511)
71 #define CONST_OK_FOR_ADD(size) \
72 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
73 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
74 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
75 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
77 /* Used to simplify the logic below. Find the attributes wherever
78 they may be. */
79 #define SH_ATTRIBUTES(decl) \
80 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
81 : DECL_ATTRIBUTES (decl) \
82 ? (DECL_ATTRIBUTES (decl)) \
83 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
85 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
86 int current_function_interrupt;
88 tree sh_deferred_function_attributes;
89 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
91 /* Global variables for machine-dependent things. */
93 /* Which cpu are we scheduling for. */
94 enum processor_type sh_cpu;
96 /* Definitions used in ready queue reordering for first scheduling pass. */
98 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
99 static short *regmode_weight[2];
101 /* Total SFmode and SImode weights of scheduled insns. */
102 static int curr_regmode_pressure[2];
104 /* Number of r0 life regions. */
105 static int r0_life_regions;
107 /* If true, skip cycles for Q -> R movement. */
108 static int skip_cycles = 0;
110 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
111 and returned from sh_reorder2. */
112 static short cached_can_issue_more;
114 /* Unique number for UNSPEC_BBR pattern. */
115 static unsigned int unspec_bbr_uid = 1;
117 /* Provides the class number of the smallest class containing
118 reg number. */
119 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
121 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
154 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
155 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
156 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
157 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
158 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
159 GENERAL_REGS, GENERAL_REGS,
162 char sh_register_names[FIRST_PSEUDO_REGISTER] \
163 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
165 char sh_additional_register_names[ADDREGNAMES_SIZE] \
166 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
167 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
169 int assembler_dialect;
171 static bool shmedia_space_reserved_for_target_registers;
173 static void split_branches (rtx);
174 static int branch_dest (rtx);
175 static void print_slot (rtx);
176 static rtx add_constant (rtx, enum machine_mode, rtx);
177 static void dump_table (rtx, rtx);
178 static bool broken_move (rtx);
179 static bool mova_p (rtx);
180 static rtx find_barrier (int, rtx, rtx);
181 static bool noncall_uses_reg (rtx, rtx, rtx *);
182 static rtx gen_block_redirect (rtx, int, int);
183 static void sh_reorg (void);
184 static void sh_option_override (void);
185 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
186 static rtx frame_insn (rtx);
187 static rtx push (int);
188 static void pop (int);
189 static void push_regs (HARD_REG_SET *, int);
190 static int calc_live_regs (HARD_REG_SET *);
191 static HOST_WIDE_INT rounded_frame_size (int);
192 static bool sh_frame_pointer_required (void);
193 static rtx mark_constant_pool_use (rtx);
194 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
195 int, bool *);
196 static tree sh_handle_resbank_handler_attribute (tree *, tree,
197 tree, int, bool *);
198 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
199 tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_print_operand (FILE *, rtx, int);
204 static void sh_print_operand_address (FILE *, rtx);
205 static bool sh_print_operand_punct_valid_p (unsigned char code);
206 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
207 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
208 static void sh_insert_attributes (tree, tree *);
209 static const char *sh_check_pch_target_flags (int);
210 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
211 static int sh_adjust_cost (rtx, rtx, rtx, int);
212 static int sh_issue_rate (void);
213 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
214 static short find_set_regmode_weight (rtx, enum machine_mode);
215 static short find_insn_regmode_weight (rtx, enum machine_mode);
216 static void find_regmode_weight (basic_block, enum machine_mode);
217 static int find_r0_life_regions (basic_block);
218 static void sh_md_init_global (FILE *, int, int);
219 static void sh_md_finish_global (FILE *, int);
220 static int rank_for_reorder (const void *, const void *);
221 static void swap_reorder (rtx *, int);
222 static void ready_reorder (rtx *, int);
223 static bool high_pressure (enum machine_mode);
224 static int sh_reorder (FILE *, int, rtx *, int *, int);
225 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
226 static void sh_md_init (FILE *, int, int);
227 static int sh_variable_issue (FILE *, int, rtx, int);
229 static bool sh_function_ok_for_sibcall (tree, tree);
231 static bool sh_cannot_modify_jumps_p (void);
232 static reg_class_t sh_target_reg_class (void);
233 static bool sh_optimize_target_register_callee_saved (bool);
234 static bool sh_ms_bitfield_layout_p (const_tree);
236 static void sh_init_builtins (void);
237 static tree sh_builtin_decl (unsigned, bool);
238 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
239 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
240 HOST_WIDE_INT, tree);
241 static void sh_file_start (void);
242 static bool flow_dependent_p (rtx, rtx);
243 static void flow_dependent_p_1 (rtx, const_rtx, void *);
244 static int shiftcosts (rtx);
245 static int and_xor_ior_costs (rtx, int);
246 static int addsubcosts (rtx);
247 static int multcosts (rtx);
248 static bool unspec_caller_rtx_p (rtx);
249 static bool sh_cannot_copy_insn_p (rtx);
250 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
251 static int sh_address_cost (rtx, enum machine_mode, addr_space_t, bool);
252 static int sh_pr_n_sets (void);
253 static rtx sh_allocate_initial_value (rtx);
254 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
255 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
256 enum machine_mode,
257 struct secondary_reload_info *);
258 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
259 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
260 static rtx sh_delegitimize_address (rtx);
261 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
262 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
263 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
264 static int scavenge_reg (HARD_REG_SET *s);
265 struct save_schedule_s;
266 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
267 struct save_schedule_s *, int);
269 static rtx sh_struct_value_rtx (tree, int);
270 static rtx sh_function_value (const_tree, const_tree, bool);
271 static bool sh_function_value_regno_p (const unsigned int);
272 static rtx sh_libcall_value (enum machine_mode, const_rtx);
273 static bool sh_return_in_memory (const_tree, const_tree);
274 static rtx sh_builtin_saveregs (void);
275 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
276 tree, int *, int);
277 static bool sh_strict_argument_naming (cumulative_args_t);
278 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
279 static tree sh_build_builtin_va_list (void);
280 static void sh_va_start (tree, rtx);
281 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
282 static bool sh_promote_prototypes (const_tree);
283 static enum machine_mode sh_promote_function_mode (const_tree type,
284 enum machine_mode,
285 int *punsignedp,
286 const_tree funtype,
287 int for_return);
288 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
289 const_tree, bool);
290 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
291 const_tree, bool);
292 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
293 tree, bool);
294 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
295 const_tree, bool);
296 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
297 const_tree, bool);
298 static bool sh_scalar_mode_supported_p (enum machine_mode);
299 static int sh_dwarf_calling_convention (const_tree);
300 static void sh_encode_section_info (tree, rtx, int);
301 static bool sh2a_function_vector_p (tree);
302 static void sh_trampoline_init (rtx, tree, rtx);
303 static rtx sh_trampoline_adjust_address (rtx);
304 static void sh_conditional_register_usage (void);
305 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
306 static int mov_insn_size (enum machine_mode, bool);
307 static int max_mov_insn_displacement (enum machine_mode, bool);
308 static int mov_insn_alignment_mask (enum machine_mode, bool);
309 static HOST_WIDE_INT disp_addr_displacement (rtx);
310 static bool sequence_insn_p (rtx);
311 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
312 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
313 enum machine_mode, bool);
314 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
316 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
318 static const struct attribute_spec sh_attribute_table[] =
320 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
321 affects_type_identity } */
322 { "interrupt_handler", 0, 0, true, false, false,
323 sh_handle_interrupt_handler_attribute, false },
324 { "sp_switch", 1, 1, true, false, false,
325 sh_handle_sp_switch_attribute, false },
326 { "trap_exit", 1, 1, true, false, false,
327 sh_handle_trap_exit_attribute, false },
328 { "renesas", 0, 0, false, true, false,
329 sh_handle_renesas_attribute, false },
330 { "trapa_handler", 0, 0, true, false, false,
331 sh_handle_interrupt_handler_attribute, false },
332 { "nosave_low_regs", 0, 0, true, false, false,
333 sh_handle_interrupt_handler_attribute, false },
334 { "resbank", 0, 0, true, false, false,
335 sh_handle_resbank_handler_attribute, false },
336 { "function_vector", 1, 1, true, false, false,
337 sh2a_handle_function_vector_handler_attribute, false },
338 { NULL, 0, 0, false, false, false, NULL, false }
341 /* Initialize the GCC target structure. */
342 #undef TARGET_ATTRIBUTE_TABLE
343 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
345 /* The next two are used for debug info when compiling with -gdwarf. */
346 #undef TARGET_ASM_UNALIGNED_HI_OP
347 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
348 #undef TARGET_ASM_UNALIGNED_SI_OP
349 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
351 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
352 #undef TARGET_ASM_UNALIGNED_DI_OP
353 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
354 #undef TARGET_ASM_ALIGNED_DI_OP
355 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
357 #undef TARGET_OPTION_OVERRIDE
358 #define TARGET_OPTION_OVERRIDE sh_option_override
360 #undef TARGET_PRINT_OPERAND
361 #define TARGET_PRINT_OPERAND sh_print_operand
362 #undef TARGET_PRINT_OPERAND_ADDRESS
363 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
364 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
365 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
366 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
367 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
369 #undef TARGET_ASM_FUNCTION_EPILOGUE
370 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
372 #undef TARGET_ASM_OUTPUT_MI_THUNK
373 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
375 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
376 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
377 hook_bool_const_tree_hwi_hwi_const_tree_true
379 #undef TARGET_ASM_FILE_START
380 #define TARGET_ASM_FILE_START sh_file_start
381 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
382 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
384 #undef TARGET_REGISTER_MOVE_COST
385 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
387 #undef TARGET_INSERT_ATTRIBUTES
388 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
390 #undef TARGET_SCHED_ADJUST_COST
391 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
393 #undef TARGET_SCHED_ISSUE_RATE
394 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
396 /* The next 5 hooks have been implemented for reenabling sched1. With the
397 help of these macros we are limiting the movement of insns in sched1 to
398 reduce the register pressure. The overall idea is to keep count of SImode
399 and SFmode regs required by already scheduled insns. When these counts
400 cross some threshold values; give priority to insns that free registers.
401 The insn that frees registers is most likely to be the insn with lowest
402 LUID (original insn order); but such an insn might be there in the stalled
403 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
404 up to a max of 8 cycles so that such insns may move from Q -> R.
406 The description of the hooks are as below:
408 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
409 scheduler; it is called inside the sched_init function just after
410 find_insn_reg_weights function call. It is used to calculate the SImode
411 and SFmode weights of insns of basic blocks; much similar to what
412 find_insn_reg_weights does.
413 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
415 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
416 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
417 (Q)->(R).
419 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
420 high; reorder the ready queue so that the insn with lowest LUID will be
421 issued next.
423 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
424 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
426 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
427 can be returned from TARGET_SCHED_REORDER2.
429 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
431 #undef TARGET_SCHED_DFA_NEW_CYCLE
432 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
434 #undef TARGET_SCHED_INIT_GLOBAL
435 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
437 #undef TARGET_SCHED_FINISH_GLOBAL
438 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
440 #undef TARGET_SCHED_VARIABLE_ISSUE
441 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
443 #undef TARGET_SCHED_REORDER
444 #define TARGET_SCHED_REORDER sh_reorder
446 #undef TARGET_SCHED_REORDER2
447 #define TARGET_SCHED_REORDER2 sh_reorder2
449 #undef TARGET_SCHED_INIT
450 #define TARGET_SCHED_INIT sh_md_init
452 #undef TARGET_DELEGITIMIZE_ADDRESS
453 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
455 #undef TARGET_LEGITIMIZE_ADDRESS
456 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
458 #undef TARGET_CANNOT_MODIFY_JUMPS_P
459 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
460 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
461 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
462 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
463 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
464 sh_optimize_target_register_callee_saved
466 #undef TARGET_MS_BITFIELD_LAYOUT_P
467 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
469 #undef TARGET_INIT_BUILTINS
470 #define TARGET_INIT_BUILTINS sh_init_builtins
471 #undef TARGET_BUILTIN_DECL
472 #define TARGET_BUILTIN_DECL sh_builtin_decl
473 #undef TARGET_EXPAND_BUILTIN
474 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
476 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
477 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
479 #undef TARGET_CANNOT_COPY_INSN_P
480 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
481 #undef TARGET_RTX_COSTS
482 #define TARGET_RTX_COSTS sh_rtx_costs
483 #undef TARGET_ADDRESS_COST
484 #define TARGET_ADDRESS_COST sh_address_cost
485 #undef TARGET_ALLOCATE_INITIAL_VALUE
486 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
488 #undef TARGET_MACHINE_DEPENDENT_REORG
489 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
491 #undef TARGET_DWARF_REGISTER_SPAN
492 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
494 #ifdef HAVE_AS_TLS
495 #undef TARGET_HAVE_TLS
496 #define TARGET_HAVE_TLS true
497 #endif
499 #undef TARGET_PROMOTE_PROTOTYPES
500 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
501 #undef TARGET_PROMOTE_FUNCTION_MODE
502 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
504 #undef TARGET_FUNCTION_VALUE
505 #define TARGET_FUNCTION_VALUE sh_function_value
506 #undef TARGET_FUNCTION_VALUE_REGNO_P
507 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
508 #undef TARGET_LIBCALL_VALUE
509 #define TARGET_LIBCALL_VALUE sh_libcall_value
510 #undef TARGET_STRUCT_VALUE_RTX
511 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
512 #undef TARGET_RETURN_IN_MEMORY
513 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
515 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
516 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
517 #undef TARGET_SETUP_INCOMING_VARARGS
518 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
519 #undef TARGET_STRICT_ARGUMENT_NAMING
520 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
521 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
522 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
523 #undef TARGET_MUST_PASS_IN_STACK
524 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
525 #undef TARGET_PASS_BY_REFERENCE
526 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
527 #undef TARGET_CALLEE_COPIES
528 #define TARGET_CALLEE_COPIES sh_callee_copies
529 #undef TARGET_ARG_PARTIAL_BYTES
530 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
531 #undef TARGET_FUNCTION_ARG
532 #define TARGET_FUNCTION_ARG sh_function_arg
533 #undef TARGET_FUNCTION_ARG_ADVANCE
534 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
536 #undef TARGET_BUILD_BUILTIN_VA_LIST
537 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
538 #undef TARGET_EXPAND_BUILTIN_VA_START
539 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
540 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
541 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
543 #undef TARGET_SCALAR_MODE_SUPPORTED_P
544 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
545 #undef TARGET_VECTOR_MODE_SUPPORTED_P
546 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
548 #undef TARGET_CHECK_PCH_TARGET_FLAGS
549 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
551 #undef TARGET_DWARF_CALLING_CONVENTION
552 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
554 #undef TARGET_FRAME_POINTER_REQUIRED
555 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
557 /* Return regmode weight for insn. */
558 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
559 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
561 /* Return current register pressure for regmode. */
562 #define CURR_REGMODE_PRESSURE(MODE)\
563 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
565 #undef TARGET_ENCODE_SECTION_INFO
566 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
568 #undef TARGET_SECONDARY_RELOAD
569 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
571 #undef TARGET_PREFERRED_RELOAD_CLASS
572 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
574 #undef TARGET_CONDITIONAL_REGISTER_USAGE
575 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
577 #undef TARGET_LEGITIMATE_ADDRESS_P
578 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
580 #undef TARGET_TRAMPOLINE_INIT
581 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
582 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
583 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
585 #undef TARGET_LEGITIMATE_CONSTANT_P
586 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
588 #undef TARGET_CANONICALIZE_COMPARISON
589 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
591 #undef TARGET_FIXED_CONDITION_CODE_REGS
592 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
594 /* Machine-specific symbol_ref flags. */
595 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
597 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
598 is used by optabs.c atomic op expansion code as well as in sync.md. */
599 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
600 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
602 struct gcc_target targetm = TARGET_INITIALIZER;
605 /* Information on the currently selected atomic model.
606 This is initialized in sh_option_override. */
607 static sh_atomic_model selected_atomic_model_;
609 const sh_atomic_model&
610 selected_atomic_model (void)
612 return selected_atomic_model_;
615 static sh_atomic_model
616 parse_validate_atomic_model_option (const char* str)
618 const char* model_names[sh_atomic_model::num_models];
619 model_names[sh_atomic_model::none] = "none";
620 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
621 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
622 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
623 model_names[sh_atomic_model::soft_imask] = "soft-imask";
625 const char* model_cdef_names[sh_atomic_model::num_models];
626 model_cdef_names[sh_atomic_model::none] = "NONE";
627 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
628 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
629 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
630 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
632 sh_atomic_model ret;
633 ret.type = sh_atomic_model::none;
634 ret.name = model_names[sh_atomic_model::none];
635 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
636 ret.strict = false;
637 ret.tcb_gbr_offset = -1;
639 /* Handle empty string as 'none'. */
640 if (str == NULL || *str == '\0')
641 return ret;
643 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
645 std::vector<std::string> tokens;
646 for (std::stringstream ss (str); ss.good (); )
648 tokens.push_back (std::string ());
649 std::getline (ss, tokens.back (), ',');
652 if (tokens.empty ())
653 err_ret ("invalid atomic model option");
655 /* The first token must be the atomic model name. */
657 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
658 if (tokens.front () == model_names[i])
660 ret.type = (sh_atomic_model::enum_type)i;
661 ret.name = model_names[i];
662 ret.cdef_name = model_cdef_names[i];
663 goto got_mode_name;
666 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
667 got_mode_name:;
670 /* Go through the remaining tokens. */
671 for (size_t i = 1; i < tokens.size (); ++i)
673 if (tokens[i] == "strict")
674 ret.strict = true;
675 else if (tokens[i].find ("gbr-offset=") == 0)
677 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
678 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
679 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
680 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
681 "option", offset_str.c_str ());
683 else
684 err_ret ("unknown parameter \"%s\" in atomic model option",
685 tokens[i].c_str ());
688 /* Check that the selection makes sense. */
689 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
690 err_ret ("atomic operations are not supported on SHmedia");
692 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
693 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
694 ret.name);
696 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
697 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
699 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
700 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
702 if (ret.type == sh_atomic_model::soft_tcb
703 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
704 || (ret.tcb_gbr_offset & 3) != 0))
705 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
706 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
707 ret.name);
709 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
710 err_ret ("cannot use atomic model %s in user mode", ret.name);
712 return ret;
714 #undef err_ret
717 /* Register SH specific RTL passes. */
718 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
719 const char* name);
720 static void
721 register_sh_passes (void)
723 if (!TARGET_SH1)
724 return;
726 /* Running the sh_treg_combine pass after ce1 generates better code when
727 comparisons are combined and reg-reg moves are introduced, because
728 reg-reg moves will be eliminated afterwards. However, there are quite
729 some cases where combine will be unable to fold comparison related insns,
730 thus for now don't do it.
731 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
732 PASS_POS_INSERT_AFTER, "ce1", 1);
735 /* Run sh_treg_combine pass after combine but before register allocation. */
736 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
737 PASS_POS_INSERT_AFTER, "split1", 1);
739 /* Run sh_treg_combine pass after register allocation and basic block
740 reordering as this sometimes creates new opportunities. */
741 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
742 PASS_POS_INSERT_AFTER, "split4", 1);
745 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
746 various options, and do some machine dependent initialization. */
747 static void
748 sh_option_override (void)
750 int regno;
752 SUBTARGET_OVERRIDE_OPTIONS;
753 if (optimize > 1 && !optimize_size)
754 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
755 sh_cpu = PROCESSOR_SH1;
756 assembler_dialect = 0;
757 if (TARGET_SH2)
758 sh_cpu = PROCESSOR_SH2;
759 if (TARGET_SH2E)
760 sh_cpu = PROCESSOR_SH2E;
761 if (TARGET_SH2A)
762 sh_cpu = PROCESSOR_SH2A;
763 if (TARGET_SH3)
764 sh_cpu = PROCESSOR_SH3;
765 if (TARGET_SH3E)
766 sh_cpu = PROCESSOR_SH3E;
767 if (TARGET_SH4)
769 assembler_dialect = 1;
770 sh_cpu = PROCESSOR_SH4;
772 if (TARGET_SH4A_ARCH)
774 assembler_dialect = 1;
775 sh_cpu = PROCESSOR_SH4A;
777 if (TARGET_SH5)
779 sh_cpu = PROCESSOR_SH5;
780 target_flags |= MASK_ALIGN_DOUBLE;
781 if (TARGET_SHMEDIA_FPU)
782 target_flags |= MASK_FMOVD;
783 if (TARGET_SHMEDIA)
785 /* There are no delay slots on SHmedia. */
786 flag_delayed_branch = 0;
787 /* Relaxation isn't yet supported for SHmedia */
788 target_flags &= ~MASK_RELAX;
789 /* After reload, if conversion does little good but can cause
790 ICEs:
791 - find_if_block doesn't do anything for SH because we don't
792 have conditional execution patterns. (We use conditional
793 move patterns, which are handled differently, and only
794 before reload).
795 - find_cond_trap doesn't do anything for the SH because we
796 don't have conditional traps.
797 - find_if_case_1 uses redirect_edge_and_branch_force in
798 the only path that does an optimization, and this causes
799 an ICE when branch targets are in registers.
800 - find_if_case_2 doesn't do anything for the SHmedia after
801 reload except when it can redirect a tablejump - and
802 that's rather rare. */
803 flag_if_conversion2 = 0;
804 if (! strcmp (sh_div_str, "call"))
805 sh_div_strategy = SH_DIV_CALL;
806 else if (! strcmp (sh_div_str, "call2"))
807 sh_div_strategy = SH_DIV_CALL2;
808 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
809 sh_div_strategy = SH_DIV_FP;
810 else if (! strcmp (sh_div_str, "inv"))
811 sh_div_strategy = SH_DIV_INV;
812 else if (! strcmp (sh_div_str, "inv:minlat"))
813 sh_div_strategy = SH_DIV_INV_MINLAT;
814 else if (! strcmp (sh_div_str, "inv20u"))
815 sh_div_strategy = SH_DIV_INV20U;
816 else if (! strcmp (sh_div_str, "inv20l"))
817 sh_div_strategy = SH_DIV_INV20L;
818 else if (! strcmp (sh_div_str, "inv:call2"))
819 sh_div_strategy = SH_DIV_INV_CALL2;
820 else if (! strcmp (sh_div_str, "inv:call"))
821 sh_div_strategy = SH_DIV_INV_CALL;
822 else if (! strcmp (sh_div_str, "inv:fp"))
824 if (TARGET_FPU_ANY)
825 sh_div_strategy = SH_DIV_INV_FP;
826 else
827 sh_div_strategy = SH_DIV_INV;
829 TARGET_CBRANCHDI4 = 0;
830 /* Assembler CFI isn't yet fully supported for SHmedia. */
831 flag_dwarf2_cfi_asm = 0;
834 else
836 /* Only the sh64-elf assembler fully supports .quad properly. */
837 targetm.asm_out.aligned_op.di = NULL;
838 targetm.asm_out.unaligned_op.di = NULL;
840 if (TARGET_SH1)
842 if (! strcmp (sh_div_str, "call-div1"))
843 sh_div_strategy = SH_DIV_CALL_DIV1;
844 else if (! strcmp (sh_div_str, "call-fp")
845 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
846 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
847 sh_div_strategy = SH_DIV_CALL_FP;
848 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
849 sh_div_strategy = SH_DIV_CALL_TABLE;
850 else
851 /* Pick one that makes most sense for the target in general.
852 It is not much good to use different functions depending
853 on -Os, since then we'll end up with two different functions
854 when some of the code is compiled for size, and some for
855 speed. */
857 /* SH4 tends to emphasize speed. */
858 if (TARGET_HARD_SH4)
859 sh_div_strategy = SH_DIV_CALL_TABLE;
860 /* These have their own way of doing things. */
861 else if (TARGET_SH2A)
862 sh_div_strategy = SH_DIV_INTRINSIC;
863 /* ??? Should we use the integer SHmedia function instead? */
864 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
865 sh_div_strategy = SH_DIV_CALL_FP;
866 /* SH1 .. SH3 cores often go into small-footprint systems, so
867 default to the smallest implementation available. */
868 else
869 sh_div_strategy = SH_DIV_CALL_DIV1;
871 if (!TARGET_SH1)
872 TARGET_PRETEND_CMOVE = 0;
873 if (sh_divsi3_libfunc[0])
874 ; /* User supplied - leave it alone. */
875 else if (TARGET_DIVIDE_CALL_FP)
876 sh_divsi3_libfunc = "__sdivsi3_i4";
877 else if (TARGET_DIVIDE_CALL_TABLE)
878 sh_divsi3_libfunc = "__sdivsi3_i4i";
879 else if (TARGET_SH5)
880 sh_divsi3_libfunc = "__sdivsi3_1";
881 else
882 sh_divsi3_libfunc = "__sdivsi3";
883 if (sh_branch_cost == -1)
885 sh_branch_cost = 1;
887 /* The SH1 does not have delay slots, hence we get a pipeline stall
888 at every branch. The SH4 is superscalar, so the single delay slot
889 is not sufficient to keep both pipelines filled. */
890 if (! TARGET_SH2 || TARGET_HARD_SH4)
891 sh_branch_cost = 2;
894 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
895 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
896 TARGET_ZDCBRANCH = 1;
898 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
899 if (! VALID_REGISTER_P (regno))
900 sh_register_names[regno][0] = '\0';
902 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
903 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
904 sh_additional_register_names[regno][0] = '\0';
906 if ((flag_pic && ! TARGET_PREFERGOT)
907 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
908 flag_no_function_cse = 1;
910 if (targetm.small_register_classes_for_mode_p (VOIDmode))
912 /* Never run scheduling before reload, since that can
913 break global alloc, and generates slower code anyway due
914 to the pressure on R0. */
915 /* Enable sched1 for SH4 if the user explicitly requests.
916 When sched1 is enabled, the ready queue will be reordered by
917 the target hooks if pressure is high. We can not do this for
918 PIC, SH3 and lower as they give spill failures for R0. */
919 if (!TARGET_HARD_SH4 || flag_pic)
920 flag_schedule_insns = 0;
921 /* ??? Current exception handling places basic block boundaries
922 after call_insns. It causes the high pressure on R0 and gives
923 spill failures for R0 in reload. See PR 22553 and the thread
924 on gcc-patches
925 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
926 else if (flag_exceptions)
928 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
929 warning (0, "ignoring -fschedule-insns because of exception "
930 "handling bug");
931 flag_schedule_insns = 0;
933 else if (flag_schedule_insns
934 && !global_options_set.x_flag_schedule_insns)
935 flag_schedule_insns = 0;
938 /* Unwind info is not correct around the CFG unless either a frame
939 pointer is present or M_A_O_A is set. Fixing this requires rewriting
940 unwind info generation to be aware of the CFG and propagating states
941 around edges. */
942 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
943 || flag_exceptions || flag_non_call_exceptions)
944 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
946 warning (0, "unwind tables currently require either a frame pointer "
947 "or -maccumulate-outgoing-args for correctness");
948 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
951 /* Unwinding with -freorder-blocks-and-partition does not work on this
952 architecture, because it requires far jumps to label crossing between
953 hot/cold sections which are rejected on this architecture. */
954 if (flag_reorder_blocks_and_partition)
956 if (flag_exceptions)
958 inform (input_location,
959 "-freorder-blocks-and-partition does not work with "
960 "exceptions on this architecture");
961 flag_reorder_blocks_and_partition = 0;
962 flag_reorder_blocks = 1;
964 else if (flag_unwind_tables)
966 inform (input_location,
967 "-freorder-blocks-and-partition does not support unwind "
968 "info on this architecture");
969 flag_reorder_blocks_and_partition = 0;
970 flag_reorder_blocks = 1;
974 /* Adjust loop, jump and function alignment values (in bytes), if those
975 were not specified by the user using -falign-loops, -falign-jumps
976 and -falign-functions options.
977 32 bit alignment is better for speed, because instructions can be
978 fetched as a pair from a longword boundary. For size use 16 bit
979 alignment to get more compact code.
980 Aligning all jumps increases the code size, even if it might
981 result in slightly faster code. Thus, it is set to the smallest
982 alignment possible if not specified by the user. */
983 if (align_loops == 0)
985 if (TARGET_SH5)
986 align_loops = 8;
987 else
988 align_loops = optimize_size ? 2 : 4;
991 if (align_jumps == 0)
993 if (TARGET_SHMEDIA)
994 align_jumps = 1 << CACHE_LOG;
995 else
996 align_jumps = 2;
998 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
999 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1001 if (align_functions == 0)
1003 if (TARGET_SHMEDIA)
1004 align_functions = optimize_size
1005 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1006 else
1007 align_functions = optimize_size ? 2 : 4;
1010 /* The linker relaxation code breaks when a function contains
1011 alignments that are larger than that at the start of a
1012 compilation unit. */
1013 if (TARGET_RELAX)
1015 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1017 /* Also take possible .long constants / mova tables into account. */
1018 if (min_align < 4)
1019 min_align = 4;
1020 if (align_functions < min_align)
1021 align_functions = min_align;
1024 if (flag_unsafe_math_optimizations)
1026 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1027 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1028 TARGET_FSCA = 1;
1030 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1031 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1032 TARGET_FSRRA = 1;
1035 /* Allow fsrra insn only if -funsafe-math-optimizations and
1036 -ffinite-math-only is enabled. */
1037 TARGET_FSRRA = TARGET_FSRRA
1038 && flag_unsafe_math_optimizations
1039 && flag_finite_math_only;
1041 /* If the -mieee option was not explicitly set by the user, turn it on
1042 unless -ffinite-math-only was specified. See also PR 33135. */
1043 if (! global_options_set.x_TARGET_IEEE)
1044 TARGET_IEEE = ! flag_finite_math_only;
1046 if (sh_fixed_range_str)
1047 sh_fix_range (sh_fixed_range_str);
1049 /* This target defaults to strict volatile bitfields. */
1050 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1051 flag_strict_volatile_bitfields = 1;
1053 /* Parse atomic model option and make sure it is valid for the current
1054 target CPU. */
1055 selected_atomic_model_
1056 = parse_validate_atomic_model_option (sh_atomic_model_str);
1058 register_sh_passes ();
1061 /* Print the operand address in x to the stream. */
1062 static void
1063 sh_print_operand_address (FILE *stream, rtx x)
1065 switch (GET_CODE (x))
1067 case REG:
1068 case SUBREG:
1069 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1070 break;
1072 case PLUS:
1074 rtx base = XEXP (x, 0);
1075 rtx index = XEXP (x, 1);
1077 switch (GET_CODE (index))
1079 case CONST_INT:
1080 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1081 reg_names[true_regnum (base)]);
1082 break;
1084 case REG:
1085 case SUBREG:
1087 int base_num = true_regnum (base);
1088 int index_num = true_regnum (index);
1090 fprintf (stream, "@(r0,%s)",
1091 reg_names[MAX (base_num, index_num)]);
1092 break;
1095 default:
1096 gcc_unreachable ();
1099 break;
1101 case PRE_DEC:
1102 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1103 break;
1105 case POST_INC:
1106 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1107 break;
1109 default:
1110 x = mark_constant_pool_use (x);
1111 output_addr_const (stream, x);
1112 break;
1116 /* Print operand x (an rtx) in assembler syntax to file stream
1117 according to modifier code.
1119 '.' print a .s if insn needs delay slot
1120 ',' print LOCAL_LABEL_PREFIX
1121 '@' print trap, rte or rts depending upon pragma interruptness
1122 '#' output a nop if there is nothing to put in the delay slot
1123 ''' print likelihood suffix (/u for unlikely).
1124 '>' print branch target if -fverbose-asm
1125 'O' print a constant without the #
1126 'R' print the LSW of a dp value - changes if in little endian
1127 'S' print the MSW of a dp value - changes if in little endian
1128 'T' print the next word of a dp value - same as 'R' in big endian mode.
1129 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1130 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1131 'N' print 'r63' if the operand is (const_int 0).
1132 'd' print a V2SF reg as dN instead of fpN.
1133 'm' print a pair `base,offset' or `base,index', for LD and ST.
1134 'U' Likewise for {LD,ST}{HI,LO}.
1135 'V' print the position of a single bit set.
1136 'W' print the position of a single bit cleared.
1137 't' print a memory address which is a register.
1138 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1139 'o' output an operator. */
1140 static void
1141 sh_print_operand (FILE *stream, rtx x, int code)
1143 int regno;
1144 enum machine_mode mode;
1146 switch (code)
1148 tree trapa_attr;
1150 case '.':
1151 if (final_sequence
1152 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1153 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1154 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1155 break;
1156 case ',':
1157 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1158 break;
1159 case '@':
1160 trapa_attr = lookup_attribute ("trap_exit",
1161 DECL_ATTRIBUTES (current_function_decl));
1162 if (trapa_attr)
1163 fprintf (stream, "trapa #%ld",
1164 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1165 else if (sh_cfun_interrupt_handler_p ())
1167 if (sh_cfun_resbank_handler_p ())
1168 fprintf (stream, "resbank\n");
1169 fprintf (stream, "rte");
1171 else
1172 fprintf (stream, "rts");
1173 break;
1174 case '#':
1175 /* Output a nop if there's nothing in the delay slot. */
1176 if (dbr_sequence_length () == 0)
1177 fprintf (stream, "\n\tnop");
1178 break;
1179 case '\'':
1181 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1183 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1184 fputs ("/u", stream);
1185 break;
1187 case '>':
1188 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1190 fputs ("\t! target: ", stream);
1191 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1193 break;
1194 case 'O':
1195 x = mark_constant_pool_use (x);
1196 output_addr_const (stream, x);
1197 break;
1198 /* N.B.: %R / %S / %T adjust memory addresses by four.
1199 For SHMEDIA, that means they can be used to access the first and
1200 second 32 bit part of a 64 bit (or larger) value that
1201 might be held in floating point registers or memory.
1202 While they can be used to access 64 bit parts of a larger value
1203 held in general purpose registers, that won't work with memory -
1204 neither for fp registers, since the frxx names are used. */
1205 case 'R':
1206 if (REG_P (x) || GET_CODE (x) == SUBREG)
1208 regno = true_regnum (x);
1209 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1210 fputs (reg_names[regno], (stream));
1212 else if (MEM_P (x))
1214 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1215 sh_print_operand_address (stream, XEXP (x, 0));
1217 else
1219 rtx sub = NULL_RTX;
1221 mode = GET_MODE (x);
1222 if (mode == VOIDmode)
1223 mode = DImode;
1224 if (GET_MODE_SIZE (mode) >= 8)
1225 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1226 if (sub)
1227 sh_print_operand (stream, sub, 0);
1228 else
1229 output_operand_lossage ("invalid operand to %%R");
1231 break;
1232 case 'S':
1233 if (REG_P (x) || GET_CODE (x) == SUBREG)
1235 regno = true_regnum (x);
1236 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1237 fputs (reg_names[regno], (stream));
1239 else if (MEM_P (x))
1241 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1242 sh_print_operand_address (stream, XEXP (x, 0));
1244 else
1246 rtx sub = NULL_RTX;
1248 mode = GET_MODE (x);
1249 if (mode == VOIDmode)
1250 mode = DImode;
1251 if (GET_MODE_SIZE (mode) >= 8)
1252 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1253 if (sub)
1254 sh_print_operand (stream, sub, 0);
1255 else
1256 output_operand_lossage ("invalid operand to %%S");
1258 break;
1259 case 'T':
1260 /* Next word of a double. */
1261 switch (GET_CODE (x))
1263 case REG:
1264 fputs (reg_names[REGNO (x) + 1], (stream));
1265 break;
1266 case MEM:
1267 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1268 && GET_CODE (XEXP (x, 0)) != POST_INC)
1269 x = adjust_address (x, SImode, 4);
1270 sh_print_operand_address (stream, XEXP (x, 0));
1271 break;
1272 default:
1273 break;
1275 break;
1277 case 't':
1278 gcc_assert (MEM_P (x));
1279 x = XEXP (x, 0);
1280 switch (GET_CODE (x))
1282 case REG:
1283 case SUBREG:
1284 sh_print_operand (stream, x, 0);
1285 break;
1286 default:
1287 break;
1289 break;
1291 case 'o':
1292 switch (GET_CODE (x))
1294 case PLUS: fputs ("add", stream); break;
1295 case MINUS: fputs ("sub", stream); break;
1296 case MULT: fputs ("mul", stream); break;
1297 case DIV: fputs ("div", stream); break;
1298 case EQ: fputs ("eq", stream); break;
1299 case NE: fputs ("ne", stream); break;
1300 case GT: case LT: fputs ("gt", stream); break;
1301 case GE: case LE: fputs ("ge", stream); break;
1302 case GTU: case LTU: fputs ("gtu", stream); break;
1303 case GEU: case LEU: fputs ("geu", stream); break;
1304 default:
1305 break;
1307 break;
1308 case 'M':
1309 if (TARGET_SHMEDIA)
1311 if (MEM_P (x)
1312 && GET_CODE (XEXP (x, 0)) == PLUS
1313 && (REG_P (XEXP (XEXP (x, 0), 1))
1314 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1315 fputc ('x', stream);
1317 else
1319 if (MEM_P (x))
1321 switch (GET_MODE (x))
1323 case QImode: fputs (".b", stream); break;
1324 case HImode: fputs (".w", stream); break;
1325 case SImode: fputs (".l", stream); break;
1326 case SFmode: fputs (".s", stream); break;
1327 case DFmode: fputs (".d", stream); break;
1328 default: gcc_unreachable ();
1332 break;
1334 case 'm':
1335 gcc_assert (MEM_P (x));
1336 x = XEXP (x, 0);
1337 /* Fall through. */
1338 case 'U':
1339 switch (GET_CODE (x))
1341 case REG:
1342 case SUBREG:
1343 sh_print_operand (stream, x, 0);
1344 fputs (", 0", stream);
1345 break;
1347 case PLUS:
1348 sh_print_operand (stream, XEXP (x, 0), 0);
1349 fputs (", ", stream);
1350 sh_print_operand (stream, XEXP (x, 1), 0);
1351 break;
1353 default:
1354 gcc_unreachable ();
1356 break;
1358 case 'V':
1360 int num = exact_log2 (INTVAL (x));
1361 gcc_assert (num >= 0);
1362 fprintf (stream, "#%d", num);
1364 break;
1366 case 'W':
1368 int num = exact_log2 (~INTVAL (x));
1369 gcc_assert (num >= 0);
1370 fprintf (stream, "#%d", num);
1372 break;
1374 case 'd':
1375 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1377 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1378 break;
1380 case 'N':
1381 if (x == CONST0_RTX (GET_MODE (x)))
1383 fprintf ((stream), "r63");
1384 break;
1386 goto default_output;
1387 case 'u':
1388 if (CONST_INT_P (x))
1390 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1391 break;
1393 /* Fall through. */
1395 default_output:
1396 default:
1397 regno = 0;
1398 mode = GET_MODE (x);
1400 switch (GET_CODE (x))
1402 case TRUNCATE:
1404 rtx inner = XEXP (x, 0);
1405 int offset = 0;
1406 enum machine_mode inner_mode;
1408 /* We might see SUBREGs with vector mode registers inside. */
1409 if (GET_CODE (inner) == SUBREG
1410 && (GET_MODE_SIZE (GET_MODE (inner))
1411 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1412 && subreg_lowpart_p (inner))
1413 inner = SUBREG_REG (inner);
1414 if (CONST_INT_P (inner))
1416 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1417 goto default_output;
1419 inner_mode = GET_MODE (inner);
1420 if (GET_CODE (inner) == SUBREG
1421 && (GET_MODE_SIZE (GET_MODE (inner))
1422 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1423 && REG_P (SUBREG_REG (inner)))
1425 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1426 GET_MODE (SUBREG_REG (inner)),
1427 SUBREG_BYTE (inner),
1428 GET_MODE (inner));
1429 inner = SUBREG_REG (inner);
1431 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1432 abort ();
1433 /* Floating point register pairs are always big endian;
1434 general purpose registers are 64 bit wide. */
1435 regno = REGNO (inner);
1436 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1437 - HARD_REGNO_NREGS (regno, mode))
1438 + offset;
1439 x = inner;
1440 goto reg;
1442 case SIGN_EXTEND:
1443 x = XEXP (x, 0);
1444 goto reg;
1445 /* FIXME: We need this on SHmedia32 because reload generates
1446 some sign-extended HI or QI loads into DImode registers
1447 but, because Pmode is SImode, the address ends up with a
1448 subreg:SI of the DImode register. Maybe reload should be
1449 fixed so as to apply alter_subreg to such loads? */
1450 case IF_THEN_ELSE:
1451 gcc_assert (trapping_target_operand (x, VOIDmode));
1452 x = XEXP (XEXP (x, 2), 0);
1453 goto default_output;
1454 case SUBREG:
1455 gcc_assert (SUBREG_BYTE (x) == 0
1456 && REG_P (SUBREG_REG (x)));
1458 x = SUBREG_REG (x);
1459 /* Fall through. */
1461 reg:
1462 case REG:
1463 regno += REGNO (x);
1464 if (FP_REGISTER_P (regno)
1465 && mode == V16SFmode)
1466 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1467 else if (FP_REGISTER_P (REGNO (x))
1468 && mode == V4SFmode)
1469 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1470 else if (REG_P (x)
1471 && mode == V2SFmode)
1472 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1473 else if (FP_REGISTER_P (REGNO (x))
1474 && GET_MODE_SIZE (mode) > 4)
1475 fprintf ((stream), "d%s", reg_names[regno] + 1);
1476 else
1477 fputs (reg_names[regno], (stream));
1478 break;
1480 case MEM:
1481 output_address (XEXP (x, 0));
1482 break;
1484 default:
1485 if (TARGET_SH1)
1486 fputc ('#', stream);
1487 output_addr_const (stream, x);
1488 break;
1490 break;
1494 static bool
1495 sh_print_operand_punct_valid_p (unsigned char code)
1497 return (code == '.' || code == '#' || code == '@' || code == ','
1498 || code == '$' || code == '\'' || code == '>');
1501 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1502 static bool
1503 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1505 if (GET_CODE (x) == UNSPEC)
1507 switch (XINT (x, 1))
1509 case UNSPEC_DATALABEL:
1510 fputs ("datalabel ", file);
1511 output_addr_const (file, XVECEXP (x, 0, 0));
1512 break;
1513 case UNSPEC_PIC:
1514 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1515 output_addr_const (file, XVECEXP (x, 0, 0));
1516 break;
1517 case UNSPEC_GOT:
1518 output_addr_const (file, XVECEXP (x, 0, 0));
1519 fputs ("@GOT", file);
1520 break;
1521 case UNSPEC_GOTOFF:
1522 output_addr_const (file, XVECEXP (x, 0, 0));
1523 fputs ("@GOTOFF", file);
1524 break;
1525 case UNSPEC_PLT:
1526 output_addr_const (file, XVECEXP (x, 0, 0));
1527 fputs ("@PLT", file);
1528 break;
1529 case UNSPEC_GOTPLT:
1530 output_addr_const (file, XVECEXP (x, 0, 0));
1531 fputs ("@GOTPLT", file);
1532 break;
1533 case UNSPEC_DTPOFF:
1534 output_addr_const (file, XVECEXP (x, 0, 0));
1535 fputs ("@DTPOFF", file);
1536 break;
1537 case UNSPEC_GOTTPOFF:
1538 output_addr_const (file, XVECEXP (x, 0, 0));
1539 fputs ("@GOTTPOFF", file);
1540 break;
1541 case UNSPEC_TPOFF:
1542 output_addr_const (file, XVECEXP (x, 0, 0));
1543 fputs ("@TPOFF", file);
1544 break;
1545 case UNSPEC_CALLER:
1547 char name[32];
1548 /* LPCS stands for Label for PIC Call Site. */
1549 targetm.asm_out.generate_internal_label (name, "LPCS",
1550 INTVAL (XVECEXP (x, 0, 0)));
1551 assemble_name (file, name);
1553 break;
1554 case UNSPEC_EXTRACT_S16:
1555 case UNSPEC_EXTRACT_U16:
1557 rtx val, shift;
1559 val = XVECEXP (x, 0, 0);
1560 shift = XVECEXP (x, 0, 1);
1561 fputc ('(', file);
1562 if (shift != const0_rtx)
1563 fputc ('(', file);
1564 if (GET_CODE (val) == CONST
1565 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1567 fputc ('(', file);
1568 output_addr_const (file, val);
1569 fputc (')', file);
1571 else
1572 output_addr_const (file, val);
1573 if (shift != const0_rtx)
1575 fputs (" >> ", file);
1576 output_addr_const (file, shift);
1577 fputc (')', file);
1579 fputs (" & 65535)", file);
1581 break;
1582 case UNSPEC_SYMOFF:
1583 output_addr_const (file, XVECEXP (x, 0, 0));
1584 fputc ('-', file);
1585 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1587 fputc ('(', file);
1588 output_addr_const (file, XVECEXP (x, 0, 1));
1589 fputc (')', file);
1591 else
1592 output_addr_const (file, XVECEXP (x, 0, 1));
1593 break;
1594 case UNSPEC_PCREL_SYMOFF:
1595 output_addr_const (file, XVECEXP (x, 0, 0));
1596 fputs ("-(", file);
1597 output_addr_const (file, XVECEXP (x, 0, 1));
1598 fputs ("-.)", file);
1599 break;
1600 default:
1601 return false;
1603 return true;
1605 else
1606 return false;
1609 /* Encode symbol attributes of a SYMBOL_REF into its
1610 SYMBOL_REF_FLAGS. */
1611 static void
1612 sh_encode_section_info (tree decl, rtx rtl, int first)
1614 default_encode_section_info (decl, rtl, first);
1616 if (TREE_CODE (decl) == FUNCTION_DECL
1617 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1618 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1621 /* Prepare operands for a move define_expand; specifically, one of the
1622 operands must be in a register. */
1623 void
1624 prepare_move_operands (rtx operands[], enum machine_mode mode)
1626 if ((mode == SImode || mode == DImode)
1627 && flag_pic
1628 && ! ((mode == Pmode || mode == ptr_mode)
1629 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1631 rtx temp;
1632 if (SYMBOLIC_CONST_P (operands[1]))
1634 if (MEM_P (operands[0]))
1635 operands[1] = force_reg (Pmode, operands[1]);
1636 else if (TARGET_SHMEDIA
1637 && GET_CODE (operands[1]) == LABEL_REF
1638 && target_reg_operand (operands[0], mode))
1639 /* It's ok. */;
1640 else
1642 temp = (!can_create_pseudo_p ()
1643 ? operands[0]
1644 : gen_reg_rtx (Pmode));
1645 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1648 else if (GET_CODE (operands[1]) == CONST
1649 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1650 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1652 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1653 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1654 mode, temp);
1655 operands[1] = expand_binop (mode, add_optab, temp,
1656 XEXP (XEXP (operands[1], 0), 1),
1657 (!can_create_pseudo_p ()
1658 ? temp
1659 : gen_reg_rtx (Pmode)),
1660 0, OPTAB_LIB_WIDEN);
1664 if (! reload_in_progress && ! reload_completed)
1666 /* Copy the source to a register if both operands aren't registers. */
1667 if (! register_operand (operands[0], mode)
1668 && ! sh_register_operand (operands[1], mode))
1669 operands[1] = copy_to_mode_reg (mode, operands[1]);
1671 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1673 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1674 except that we can't use that function because it is static. */
1675 rtx new_rtx = change_address (operands[0], mode, 0);
1676 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1677 operands[0] = new_rtx;
1680 /* This case can happen while generating code to move the result
1681 of a library call to the target. Reject `st r0,@(rX,rY)' because
1682 reload will fail to find a spill register for rX, since r0 is already
1683 being used for the source. */
1684 else if (TARGET_SH1
1685 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1686 && MEM_P (operands[0])
1687 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1688 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1689 operands[1] = copy_to_mode_reg (mode, operands[1]);
1692 if (mode == Pmode || mode == ptr_mode)
1694 rtx op0, op1, opc;
1695 enum tls_model tls_kind;
1697 op0 = operands[0];
1698 op1 = operands[1];
1699 if (GET_CODE (op1) == CONST
1700 && GET_CODE (XEXP (op1, 0)) == PLUS
1701 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1702 != TLS_MODEL_NONE))
1704 opc = XEXP (XEXP (op1, 0), 1);
1705 op1 = XEXP (XEXP (op1, 0), 0);
1707 else
1708 opc = NULL_RTX;
1710 if ((tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1712 rtx tga_op1, tga_ret, tmp, tmp2;
1714 if (! flag_pic
1715 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1716 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1717 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1719 /* Don't schedule insns for getting GOT address when
1720 the first scheduling is enabled, to avoid spill
1721 failures for R0. */
1722 if (flag_schedule_insns)
1723 emit_insn (gen_blockage ());
1724 emit_insn (gen_GOTaddr2picreg ());
1725 emit_use (gen_rtx_REG (SImode, PIC_REG));
1726 if (flag_schedule_insns)
1727 emit_insn (gen_blockage ());
1730 switch (tls_kind)
1732 case TLS_MODEL_GLOBAL_DYNAMIC:
1733 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1734 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1735 tmp = gen_reg_rtx (Pmode);
1736 emit_move_insn (tmp, tga_ret);
1737 op1 = tmp;
1738 break;
1740 case TLS_MODEL_LOCAL_DYNAMIC:
1741 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1742 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1744 tmp = gen_reg_rtx (Pmode);
1745 emit_move_insn (tmp, tga_ret);
1747 if (register_operand (op0, Pmode))
1748 tmp2 = op0;
1749 else
1750 tmp2 = gen_reg_rtx (Pmode);
1752 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1753 op1 = tmp2;
1754 break;
1756 case TLS_MODEL_INITIAL_EXEC:
1757 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1758 tmp = gen_sym2GOTTPOFF (op1);
1759 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1760 op1 = tga_op1;
1761 break;
1763 case TLS_MODEL_LOCAL_EXEC:
1764 tmp2 = gen_reg_rtx (Pmode);
1765 emit_insn (gen_store_gbr (tmp2));
1766 tmp = gen_reg_rtx (Pmode);
1767 emit_insn (gen_symTPOFF2reg (tmp, op1));
1769 if (register_operand (op0, Pmode))
1770 op1 = op0;
1771 else
1772 op1 = gen_reg_rtx (Pmode);
1774 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1775 break;
1777 default:
1778 gcc_unreachable ();
1780 if (opc)
1781 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1782 operands[1] = op1;
1787 /* Implement the canonicalize_comparison target hook for the combine
1788 pass. For the target hook this function is invoked via
1789 sh_canonicalize_comparison. This function is also re-used to
1790 canonicalize comparisons in cbranch pattern expanders. */
1791 static void
1792 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1793 enum machine_mode mode,
1794 bool op0_preserve_value)
1796 /* When invoked from within the combine pass the mode is not specified,
1797 so try to get it from one of the operands. */
1798 if (mode == VOIDmode)
1799 mode = GET_MODE (op0);
1800 if (mode == VOIDmode)
1801 mode = GET_MODE (op1);
1803 // We need to have a mode to do something useful here.
1804 if (mode == VOIDmode)
1805 return;
1807 // Currently, we don't deal with floats here.
1808 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1809 return;
1811 // Make sure that the constant operand is the second operand.
1812 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1814 if (op0_preserve_value)
1815 return;
1817 std::swap (op0, op1);
1818 cmp = swap_condition (cmp);
1821 if (CONST_INT_P (op1))
1823 /* Try to adjust the constant operand in such a way that available
1824 comparison insns can be utilized better and the constant can be
1825 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1826 constant pool. */
1827 const HOST_WIDE_INT val = INTVAL (op1);
1829 /* x > -1 --> x >= 0
1830 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1831 x <= -1 --> x < 0
1832 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1833 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1835 cmp = cmp == GT ? GE : LT;
1836 op1 = gen_int_mode (val + 1, mode);
1839 /* x >= 1 --> x > 0
1840 x >= 0x80 --> x > 0x7F
1841 x < 1 --> x <= 0
1842 x < 0x80 --> x <= 0x7F */
1843 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1845 cmp = cmp == GE ? GT : LE;
1846 op1 = gen_int_mode (val - 1, mode);
1849 /* unsigned x >= 1 --> x != 0
1850 unsigned x < 1 --> x == 0 */
1851 else if (val == 1 && (cmp == GEU || cmp == LTU))
1853 cmp = cmp == GEU ? NE : EQ;
1854 op1 = CONST0_RTX (mode);
1857 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1858 unsigned x < 0x80 --> unsigned x < 0x7F */
1859 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1861 cmp = cmp == GEU ? GTU : LEU;
1862 op1 = gen_int_mode (val - 1, mode);
1865 /* unsigned x > 0 --> x != 0
1866 unsigned x <= 0 --> x == 0 */
1867 else if (val == 0 && (cmp == GTU || cmp == LEU))
1868 cmp = cmp == GTU ? NE : EQ;
1870 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1871 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1872 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1873 && val == 0x7FFFFFFF)
1875 cmp = cmp == GTU ? LT : GE;
1876 op1 = const0_rtx;
1879 /* unsigned x >= 0x80000000 --> signed x < 0
1880 unsigned x < 0x80000000 --> signed x >= 0 */
1881 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1882 && (unsigned HOST_WIDE_INT)val
1883 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1885 cmp = cmp == GEU ? LT : GE;
1886 op1 = const0_rtx;
1891 /* This function implements the canonicalize_comparison target hook.
1892 This wrapper around the internally used sh_canonicalize_comparison
1893 function is needed to do the enum rtx_code <-> int conversion.
1894 Target hooks cannot use enum rtx_code in its definition. */
1895 static void
1896 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1897 bool op0_preserve_value)
1899 enum rtx_code tmp_code = (enum rtx_code)*code;
1900 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1901 VOIDmode, op0_preserve_value);
1902 *code = (int)tmp_code;
1905 bool
1906 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1908 *p1 = T_REG;
1909 *p2 = INVALID_REGNUM;
1910 return true;
1913 enum rtx_code
1914 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1915 enum rtx_code comparison)
1917 /* The scratch reg is only available when this is invoked from within
1918 the cbranchdi4_i splitter, through expand_cbranchdi4. */
1919 rtx scratch = NULL_RTX;
1921 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1922 comparison = GET_CODE (operands[0]);
1923 else
1924 scratch = operands[4];
1926 sh_canonicalize_comparison (comparison, operands[1], operands[2],
1927 mode, false);
1929 /* Notice that this function is also invoked after reload by
1930 the cbranchdi4_i pattern, through expand_cbranchdi4. */
1931 rtx op1 = operands[1];
1933 if (can_create_pseudo_p ())
1934 operands[1] = force_reg (mode, op1);
1935 /* When we are handling DImode comparisons, we want to keep constants so
1936 that we can optimize the component comparisons; however, memory loads
1937 are better issued as a whole so that they can be scheduled well.
1938 SImode equality comparisons allow I08 constants, but only when they
1939 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1940 into a register, that register might as well be r0, and we allow the
1941 constant. If it is already in a register, this is likely to be
1942 allocated to a different hard register, thus we load the constant into
1943 a register unless it is zero. */
1944 if (!REG_P (operands[2])
1945 && (!CONST_INT_P (operands[2])
1946 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1947 && ((comparison != EQ && comparison != NE)
1948 || (REG_P (op1) && REGNO (op1) != R0_REG)
1949 || !satisfies_constraint_I08 (operands[2])))))
1951 if (scratch && GET_MODE (scratch) == mode)
1953 emit_move_insn (scratch, operands[2]);
1954 operands[2] = scratch;
1956 else if (can_create_pseudo_p ())
1957 operands[2] = force_reg (mode, operands[2]);
1959 return comparison;
1962 void
1963 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
1965 rtx (*branch_expander) (rtx) = gen_branch_true;
1966 comparison = prepare_cbranch_operands (operands, SImode, comparison);
1967 switch (comparison)
1969 case NE: case LT: case LE: case LTU: case LEU:
1970 comparison = reverse_condition (comparison);
1971 branch_expander = gen_branch_false;
1972 default: ;
1974 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
1975 gen_rtx_fmt_ee (comparison, SImode,
1976 operands[1], operands[2])));
1977 rtx jump = emit_jump_insn (branch_expander (operands[3]));
1978 if (probability >= 0)
1979 add_int_reg_note (jump, REG_BR_PROB, probability);
1982 /* ??? How should we distribute probabilities when more than one branch
1983 is generated. So far we only have some ad-hoc observations:
1984 - If the operands are random, they are likely to differ in both parts.
1985 - If comparing items in a hash chain, the operands are random or equal;
1986 operation should be EQ or NE.
1987 - If items are searched in an ordered tree from the root, we can expect
1988 the highpart to be unequal about half of the time; operation should be
1989 an inequality comparison, operands non-constant, and overall probability
1990 about 50%. Likewise for quicksort.
1991 - Range checks will be often made against constants. Even if we assume for
1992 simplicity an even distribution of the non-constant operand over a
1993 sub-range here, the same probability could be generated with differently
1994 wide sub-ranges - as long as the ratio of the part of the subrange that
1995 is before the threshold to the part that comes after the threshold stays
1996 the same. Thus, we can't really tell anything here;
1997 assuming random distribution is at least simple.
1999 bool
2000 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2002 enum rtx_code msw_taken, msw_skip, lsw_taken;
2003 rtx skip_label = NULL_RTX;
2004 rtx op1h, op1l, op2h, op2l;
2005 int num_branches;
2006 int prob, rev_prob;
2007 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2008 rtx scratch = operands[4];
2010 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2011 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2012 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2013 op1l = gen_lowpart (SImode, operands[1]);
2014 op2l = gen_lowpart (SImode, operands[2]);
2015 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2016 prob = split_branch_probability;
2017 rev_prob = REG_BR_PROB_BASE - prob;
2018 switch (comparison)
2020 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2021 That costs 1 cycle more when the first branch can be predicted taken,
2022 but saves us mispredicts because only one branch needs prediction.
2023 It also enables generating the cmpeqdi_t-1 pattern. */
2024 case EQ:
2025 if (TARGET_CMPEQDI_T)
2027 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2028 emit_jump_insn (gen_branch_true (operands[3]));
2029 return true;
2031 msw_skip = NE;
2032 lsw_taken = EQ;
2033 if (prob >= 0)
2035 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2036 msw_skip_prob = rev_prob;
2037 if (REG_BR_PROB_BASE <= 65535)
2038 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2039 else
2041 gcc_assert (HOST_BITS_PER_WIDEST_INT >= 64);
2042 lsw_taken_prob
2043 = (prob
2044 ? (REG_BR_PROB_BASE
2045 - ((HOST_WIDEST_INT) REG_BR_PROB_BASE * rev_prob
2046 / ((HOST_WIDEST_INT) prob << 32)))
2047 : 0);
2050 break;
2051 case NE:
2052 if (TARGET_CMPEQDI_T)
2054 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2055 emit_jump_insn (gen_branch_false (operands[3]));
2056 return true;
2058 msw_taken = NE;
2059 msw_taken_prob = prob;
2060 lsw_taken = NE;
2061 lsw_taken_prob = 0;
2062 break;
2063 case GTU: case GT:
2064 msw_taken = comparison;
2065 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2066 break;
2067 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2068 msw_skip = swap_condition (msw_taken);
2069 lsw_taken = GTU;
2070 break;
2071 case GEU: case GE:
2072 if (op2l == CONST0_RTX (SImode))
2073 msw_taken = comparison;
2074 else
2076 msw_taken = comparison == GE ? GT : GTU;
2077 msw_skip = swap_condition (msw_taken);
2078 lsw_taken = GEU;
2080 break;
2081 case LTU: case LT:
2082 msw_taken = comparison;
2083 if (op2l == CONST0_RTX (SImode))
2084 break;
2085 msw_skip = swap_condition (msw_taken);
2086 lsw_taken = LTU;
2087 break;
2088 case LEU: case LE:
2089 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2090 msw_taken = comparison;
2091 else
2093 lsw_taken = LEU;
2094 if (comparison == LE)
2095 msw_taken = LT;
2096 else if (op2h != CONST0_RTX (SImode))
2097 msw_taken = LTU;
2098 else
2100 msw_skip = swap_condition (LTU);
2101 break;
2103 msw_skip = swap_condition (msw_taken);
2105 break;
2106 default: return false;
2108 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2109 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2110 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2111 if (comparison != EQ && comparison != NE && num_branches > 1)
2113 if (!CONSTANT_P (operands[2])
2114 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2115 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2117 msw_taken_prob = prob / 2U;
2118 msw_skip_prob
2119 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2120 lsw_taken_prob = prob;
2122 else
2124 msw_taken_prob = prob;
2125 msw_skip_prob = REG_BR_PROB_BASE;
2126 /* ??? If we have a constant op2h, should we use that when
2127 calculating lsw_taken_prob? */
2128 lsw_taken_prob = prob;
2131 operands[1] = op1h;
2132 operands[2] = op2h;
2133 operands[4] = NULL_RTX;
2134 if (reload_completed
2135 && ! arith_reg_or_0_operand (op2h, SImode)
2136 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2137 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2138 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2140 emit_move_insn (scratch, operands[2]);
2141 operands[2] = scratch;
2143 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2144 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2145 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2147 rtx taken_label = operands[3];
2149 /* Operands were possibly modified, but msw_skip doesn't expect this.
2150 Always use the original ones. */
2151 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2153 operands[1] = op1h;
2154 operands[2] = op2h;
2155 if (reload_completed
2156 && ! arith_reg_or_0_operand (op2h, SImode)
2157 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2159 emit_move_insn (scratch, operands[2]);
2160 operands[2] = scratch;
2164 operands[3] = skip_label = gen_label_rtx ();
2165 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2166 operands[3] = taken_label;
2168 operands[1] = op1l;
2169 operands[2] = op2l;
2170 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2172 if (reload_completed
2173 && ! arith_reg_or_0_operand (op2l, SImode)
2174 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2176 emit_move_insn (scratch, operands[2]);
2177 operands[2] = scratch;
2179 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2181 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2182 emit_label (skip_label);
2183 return true;
2186 /* Given an operand, return 1 if the evaluated operand plugged into an
2187 if_then_else will result in a branch_true, 0 if branch_false, or
2188 -1 if neither nor applies. The truth table goes like this:
2190 op | cmpval | code | result
2191 ---------+--------+---------+--------------------
2192 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2193 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2194 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2195 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2196 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2197 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2198 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2199 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2201 sh_eval_treg_value (rtx op)
2203 enum rtx_code code = GET_CODE (op);
2204 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2205 return -1;
2207 int cmpop = code == EQ ? 1 : 0;
2208 int cmpval = INTVAL (XEXP (op, 1));
2209 if (cmpval != 0 && cmpval != 1)
2210 return -1;
2212 int t;
2213 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2214 t = 0;
2215 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2216 t = 1;
2217 else
2218 return -1;
2220 return t ^ (cmpval == cmpop);
2223 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2225 static void
2226 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2228 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2230 insn = gen_rtx_PARALLEL (VOIDmode,
2231 gen_rtvec (2, insn,
2232 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2233 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2235 else
2236 emit_insn (insn);
2239 /* Prepare the operands for an scc instruction; make sure that the
2240 compare has been done and the result is in T_REG. */
2241 void
2242 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2244 rtx t_reg = get_t_reg_rtx ();
2245 enum rtx_code oldcode = code;
2246 enum machine_mode mode;
2248 /* First need a compare insn. */
2249 switch (code)
2251 case NE:
2252 /* It isn't possible to handle this case. */
2253 gcc_unreachable ();
2254 case LT:
2255 code = GT;
2256 break;
2257 case LE:
2258 code = GE;
2259 break;
2260 case LTU:
2261 code = GTU;
2262 break;
2263 case LEU:
2264 code = GEU;
2265 break;
2266 default:
2267 break;
2269 if (code != oldcode)
2271 rtx tmp = op0;
2272 op0 = op1;
2273 op1 = tmp;
2276 mode = GET_MODE (op0);
2277 if (mode == VOIDmode)
2278 mode = GET_MODE (op1);
2280 op0 = force_reg (mode, op0);
2281 if ((code != EQ && code != NE
2282 && (op1 != const0_rtx
2283 || code == GTU || code == GEU || code == LTU || code == LEU))
2284 || (mode == DImode && op1 != const0_rtx)
2285 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2286 op1 = force_reg (mode, op1);
2288 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2289 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2290 mode);
2294 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2295 rtx op0, rtx op1)
2297 rtx target = gen_reg_rtx (SImode);
2298 rtx tmp;
2300 gcc_assert (TARGET_SHMEDIA);
2301 switch (code)
2303 case EQ:
2304 case GT:
2305 case LT:
2306 case UNORDERED:
2307 case GTU:
2308 case LTU:
2309 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2310 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2311 code = NE;
2312 break;
2314 case NE:
2315 case GE:
2316 case LE:
2317 case ORDERED:
2318 case GEU:
2319 case LEU:
2320 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2321 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2322 code = EQ;
2323 break;
2325 case UNEQ:
2326 case UNGE:
2327 case UNGT:
2328 case UNLE:
2329 case UNLT:
2330 case LTGT:
2331 return NULL_RTX;
2333 default:
2334 gcc_unreachable ();
2337 if (mode == DImode)
2339 rtx t2 = gen_reg_rtx (DImode);
2340 emit_insn (gen_extendsidi2 (t2, target));
2341 target = t2;
2344 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2347 /* Called from the md file, set up the operands of a compare instruction. */
2348 void
2349 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2351 enum rtx_code code = GET_CODE (operands[0]);
2352 enum rtx_code branch_code;
2353 rtx op0 = operands[1];
2354 rtx op1 = operands[2];
2355 rtx insn, tem;
2356 bool need_ccmpeq = false;
2358 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2360 op0 = force_reg (mode, op0);
2361 op1 = force_reg (mode, op1);
2363 else
2365 if (code != EQ || mode == DImode)
2367 /* Force args into regs, since we can't use constants here. */
2368 op0 = force_reg (mode, op0);
2369 if (op1 != const0_rtx || code == GTU || code == GEU)
2370 op1 = force_reg (mode, op1);
2374 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2376 if (code == LT
2377 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2378 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2380 tem = op0, op0 = op1, op1 = tem;
2381 code = swap_condition (code);
2384 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2385 if (code == GE)
2387 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2388 need_ccmpeq = true;
2389 code = GT;
2392 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2393 to EQ/GT respectively. */
2394 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2397 switch (code)
2399 case EQ:
2400 case GT:
2401 case GE:
2402 case GTU:
2403 case GEU:
2404 branch_code = code;
2405 break;
2406 case NE:
2407 case LT:
2408 case LE:
2409 case LTU:
2410 case LEU:
2411 branch_code = reverse_condition (code);
2412 break;
2413 default:
2414 gcc_unreachable ();
2417 insn = gen_rtx_SET (VOIDmode,
2418 get_t_reg_rtx (),
2419 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2421 sh_emit_set_t_insn (insn, mode);
2422 if (need_ccmpeq)
2423 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2425 if (branch_code == code)
2426 emit_jump_insn (gen_branch_true (operands[3]));
2427 else
2428 emit_jump_insn (gen_branch_false (operands[3]));
2431 void
2432 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2434 enum rtx_code code = GET_CODE (operands[1]);
2435 rtx op0 = operands[2];
2436 rtx op1 = operands[3];
2437 rtx lab = NULL_RTX;
2438 bool invert = false;
2439 rtx tem;
2441 op0 = force_reg (mode, op0);
2442 if ((code != EQ && code != NE
2443 && (op1 != const0_rtx
2444 || code == GTU || code == GEU || code == LTU || code == LEU))
2445 || (mode == DImode && op1 != const0_rtx)
2446 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2447 op1 = force_reg (mode, op1);
2449 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2451 if (code == LT || code == LE)
2453 code = swap_condition (code);
2454 tem = op0, op0 = op1, op1 = tem;
2456 if (code == GE)
2458 if (TARGET_IEEE)
2460 lab = gen_label_rtx ();
2461 sh_emit_scc_to_t (EQ, op0, op1);
2462 emit_jump_insn (gen_branch_true (lab));
2463 code = GT;
2465 else
2467 code = LT;
2468 invert = true;
2473 if (code == NE)
2475 code = EQ;
2476 invert = true;
2479 sh_emit_scc_to_t (code, op0, op1);
2480 if (lab)
2481 emit_label (lab);
2482 if (invert)
2483 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2484 else
2485 emit_move_insn (operands[0], get_t_reg_rtx ());
2488 /* Functions to output assembly code. */
2490 /* Return a sequence of instructions to perform DI or DF move.
2492 Since the SH cannot move a DI or DF in one instruction, we have
2493 to take care when we see overlapping source and dest registers. */
2494 const char *
2495 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2496 enum machine_mode mode)
2498 rtx dst = operands[0];
2499 rtx src = operands[1];
2501 if (MEM_P (dst)
2502 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2503 return "mov.l %T1,%0" "\n"
2504 " mov.l %1,%0";
2506 if (register_operand (dst, mode)
2507 && register_operand (src, mode))
2509 if (REGNO (src) == MACH_REG)
2510 return "sts mach,%S0" "\n"
2511 " sts macl,%R0";
2513 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2514 when mov.d r1,r0 do r1->r0 then r2->r1. */
2515 if (REGNO (src) + 1 == REGNO (dst))
2516 return "mov %T1,%T0" "\n"
2517 " mov %1,%0";
2518 else
2519 return "mov %1,%0" "\n"
2520 " mov %T1,%T0";
2522 else if (CONST_INT_P (src))
2524 if (INTVAL (src) < 0)
2525 output_asm_insn ("mov #-1,%S0", operands);
2526 else
2527 output_asm_insn ("mov #0,%S0", operands);
2529 return "mov %1,%R0";
2531 else if (MEM_P (src))
2533 int ptrreg = -1;
2534 int dreg = REGNO (dst);
2535 rtx inside = XEXP (src, 0);
2537 switch (GET_CODE (inside))
2539 case REG:
2540 ptrreg = REGNO (inside);
2541 break;
2543 case SUBREG:
2544 ptrreg = subreg_regno (inside);
2545 break;
2547 case PLUS:
2548 ptrreg = REGNO (XEXP (inside, 0));
2549 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2550 an offsettable address. Unfortunately, offsettable addresses use
2551 QImode to check the offset, and a QImode offsettable address
2552 requires r0 for the other operand, which is not currently
2553 supported, so we can't use the 'o' constraint.
2554 Thus we must check for and handle r0+REG addresses here.
2555 We punt for now, since this is likely very rare. */
2556 gcc_assert (!REG_P (XEXP (inside, 1)));
2557 break;
2559 case LABEL_REF:
2560 return "mov.l %1,%0" "\n"
2561 " mov.l %1+4,%T0";
2562 case POST_INC:
2563 return "mov.l %1,%0" "\n"
2564 " mov.l %1,%T0";
2565 default:
2566 gcc_unreachable ();
2569 /* Work out the safe way to copy. Copy into the second half first. */
2570 if (dreg == ptrreg)
2571 return "mov.l %T1,%T0" "\n"
2572 " mov.l %1,%0";
2575 return "mov.l %1,%0" "\n"
2576 " mov.l %T1,%T0";
2579 /* Print an instruction which would have gone into a delay slot after
2580 another instruction, but couldn't because the other instruction expanded
2581 into a sequence where putting the slot insn at the end wouldn't work. */
2582 static void
2583 print_slot (rtx insn)
2585 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
2587 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
2590 const char *
2591 output_far_jump (rtx insn, rtx op)
2593 struct { rtx lab, reg, op; } this_jmp;
2594 rtx braf_base_lab = NULL_RTX;
2595 const char *jump;
2596 int far;
2597 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2598 rtx prev;
2600 this_jmp.lab = gen_label_rtx ();
2602 if (TARGET_SH2
2603 && offset >= -32764
2604 && offset - get_attr_length (insn) <= 32766)
2606 far = 0;
2607 jump = "mov.w %O0,%1" "\n"
2608 " braf %1";
2610 else
2612 far = 1;
2613 if (flag_pic)
2615 if (TARGET_SH2)
2616 jump = "mov.l %O0,%1" "\n"
2617 " braf %1";
2618 else
2619 jump = "mov.l r0,@-r15" "\n"
2620 " mova %O0,r0" "\n"
2621 " mov.l @r0,%1" "\n"
2622 " add r0,%1" "\n"
2623 " mov.l @r15+,r0" "\n"
2624 " jmp @%1";
2626 else
2627 jump = "mov.l %O0,%1" "\n"
2628 " jmp @%1";
2630 /* If we have a scratch register available, use it. */
2631 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2632 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2634 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2635 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2636 jump = "mov.l r1,@-r15" "\n"
2637 " mova %O0,r0" "\n"
2638 " mov.l @r0,r1" "\n"
2639 " add r1,r0" "\n"
2640 " mov.l @r15+,r1" "\n"
2641 " jmp @%1";
2642 output_asm_insn (jump, &this_jmp.lab);
2643 if (dbr_sequence_length ())
2644 print_slot (final_sequence);
2645 else
2646 output_asm_insn ("nop", 0);
2648 else
2650 /* Output the delay slot insn first if any. */
2651 if (dbr_sequence_length ())
2652 print_slot (final_sequence);
2654 this_jmp.reg = gen_rtx_REG (SImode, 13);
2655 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2656 Fortunately, MACL is fixed and call-clobbered, and we never
2657 need its value across jumps, so save r13 in it instead of in
2658 the stack. */
2659 if (TARGET_SH5)
2660 output_asm_insn ("lds r13,macl", 0);
2661 else
2662 output_asm_insn ("mov.l r13,@-r15", 0);
2663 output_asm_insn (jump, &this_jmp.lab);
2664 if (TARGET_SH5)
2665 output_asm_insn ("sts macl,r13", 0);
2666 else
2667 output_asm_insn ("mov.l @r15+,r13", 0);
2669 if (far && flag_pic && TARGET_SH2)
2671 braf_base_lab = gen_label_rtx ();
2672 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2673 CODE_LABEL_NUMBER (braf_base_lab));
2675 if (far)
2676 output_asm_insn (".align 2", 0);
2677 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2678 this_jmp.op = op;
2679 if (far && flag_pic)
2681 if (TARGET_SH2)
2682 this_jmp.lab = braf_base_lab;
2683 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2685 else
2686 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2687 return "";
2690 /* Local label counter, used for constants in the pool and inside
2691 pattern branches. */
2692 static int lf = 100;
2694 /* Output code for ordinary branches. */
2695 const char *
2696 output_branch (int logic, rtx insn, rtx *operands)
2698 switch (get_attr_length (insn))
2700 case 6:
2701 /* This can happen if filling the delay slot has caused a forward
2702 branch to exceed its range (we could reverse it, but only
2703 when we know we won't overextend other branches; this should
2704 best be handled by relaxation).
2705 It can also happen when other condbranches hoist delay slot insn
2706 from their destination, thus leading to code size increase.
2707 But the branch will still be in the range -4092..+4098 bytes. */
2708 if (! TARGET_RELAX)
2710 int label = lf++;
2711 /* The call to print_slot will clobber the operands. */
2712 rtx op0 = operands[0];
2714 /* If the instruction in the delay slot is annulled (true), then
2715 there is no delay slot where we can put it now. The only safe
2716 place for it is after the label. final will do that by default. */
2718 if (final_sequence
2719 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2720 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2722 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2723 ASSEMBLER_DIALECT ? "/" : ".", label);
2724 print_slot (final_sequence);
2726 else
2727 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2729 output_asm_insn ("bra\t%l0", &op0);
2730 fprintf (asm_out_file, "\tnop\n");
2731 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2733 return "";
2735 /* When relaxing, handle this like a short branch. The linker
2736 will fix it up if it still doesn't fit after relaxation. */
2737 case 2:
2738 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2740 /* These are for SH2e, in which we have to account for the
2741 extra nop because of the hardware bug in annulled branches. */
2742 case 8:
2743 if (! TARGET_RELAX)
2745 int label = lf++;
2747 gcc_assert (!final_sequence
2748 || !(INSN_ANNULLED_BRANCH_P
2749 (XVECEXP (final_sequence, 0, 0))));
2750 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2751 logic ? "f" : "t",
2752 ASSEMBLER_DIALECT ? "/" : ".", label);
2753 fprintf (asm_out_file, "\tnop\n");
2754 output_asm_insn ("bra\t%l0", operands);
2755 fprintf (asm_out_file, "\tnop\n");
2756 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2758 return "";
2760 /* When relaxing, fall through. */
2761 case 4:
2763 char buffer[10];
2765 sprintf (buffer, "b%s%ss\t%%l0",
2766 logic ? "t" : "f",
2767 ASSEMBLER_DIALECT ? "/" : ".");
2768 output_asm_insn (buffer, &operands[0]);
2769 return "nop";
2772 default:
2773 /* There should be no longer branches now - that would
2774 indicate that something has destroyed the branches set
2775 up in machine_dependent_reorg. */
2776 gcc_unreachable ();
2780 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2781 fill in operands 9 as a label to the successor insn.
2782 We try to use jump threading where possible.
2783 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2784 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2785 follow jmp and bt, if the address is in range. */
2786 const char *
2787 output_branchy_insn (enum rtx_code code, const char *templ,
2788 rtx insn, rtx *operands)
2790 rtx next_insn = NEXT_INSN (insn);
2792 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2794 rtx src = SET_SRC (PATTERN (next_insn));
2795 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2797 /* Following branch not taken */
2798 operands[9] = gen_label_rtx ();
2799 emit_label_after (operands[9], next_insn);
2800 INSN_ADDRESSES_NEW (operands[9],
2801 INSN_ADDRESSES (INSN_UID (next_insn))
2802 + get_attr_length (next_insn));
2803 return templ;
2805 else
2807 int offset = (branch_dest (next_insn)
2808 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2809 if (offset >= -252 && offset <= 258)
2811 if (GET_CODE (src) == IF_THEN_ELSE)
2812 /* branch_true */
2813 src = XEXP (src, 1);
2814 operands[9] = src;
2815 return templ;
2819 operands[9] = gen_label_rtx ();
2820 emit_label_after (operands[9], insn);
2821 INSN_ADDRESSES_NEW (operands[9],
2822 INSN_ADDRESSES (INSN_UID (insn))
2823 + get_attr_length (insn));
2824 return templ;
2827 const char *
2828 output_ieee_ccmpeq (rtx insn, rtx *operands)
2830 return output_branchy_insn (NE, "bt %l9" "\n"
2831 " fcmp/eq %1,%0",
2832 insn, operands);
2835 /* Output the start of the assembler file. */
2836 static void
2837 sh_file_start (void)
2839 default_file_start ();
2841 if (TARGET_ELF)
2842 /* We need to show the text section with the proper
2843 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2844 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2845 will complain. We can teach GAS specifically about the
2846 default attributes for our choice of text section, but
2847 then we would have to change GAS again if/when we change
2848 the text section name. */
2849 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2850 else
2851 /* Switch to the data section so that the coffsem symbol
2852 isn't in the text section. */
2853 switch_to_section (data_section);
2855 if (TARGET_LITTLE_ENDIAN)
2856 fputs ("\t.little\n", asm_out_file);
2858 if (!TARGET_ELF)
2860 if (TARGET_SHCOMPACT)
2861 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2862 else if (TARGET_SHMEDIA)
2863 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2864 TARGET_SHMEDIA64 ? 64 : 32);
2868 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2869 static bool
2870 unspec_caller_rtx_p (rtx pat)
2872 rtx base, offset;
2873 int i;
2875 split_const (pat, &base, &offset);
2876 if (GET_CODE (base) == UNSPEC)
2878 if (XINT (base, 1) == UNSPEC_CALLER)
2879 return true;
2880 for (i = 0; i < XVECLEN (base, 0); i++)
2881 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2882 return true;
2884 return false;
2887 /* Indicate that INSN cannot be duplicated. This is true for insn
2888 that generates a unique label. */
2889 static bool
2890 sh_cannot_copy_insn_p (rtx insn)
2892 rtx pat;
2894 if (!reload_completed || !flag_pic)
2895 return false;
2897 if (!NONJUMP_INSN_P (insn))
2898 return false;
2899 if (asm_noperands (insn) >= 0)
2900 return false;
2902 pat = PATTERN (insn);
2903 if (GET_CODE (pat) != SET)
2904 return false;
2905 pat = SET_SRC (pat);
2907 if (unspec_caller_rtx_p (pat))
2908 return true;
2910 return false;
2913 /* Number of instructions used to make an arithmetic right shift by N. */
2914 static const char ashiftrt_insns[] =
2915 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2917 /* Description of a logical left or right shift, when expanded to a sequence
2918 of 1/2/8/16 shifts.
2919 Notice that one bit right shifts clobber the T bit. One bit left shifts
2920 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2921 enum
2923 ASHL_CLOBBERS_T = 1 << 0,
2924 LSHR_CLOBBERS_T = 1 << 1
2927 struct ashl_lshr_sequence
2929 char insn_count;
2930 char amount[6];
2931 char clobbers_t;
2934 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2936 { 0, { 0 }, 0 }, // 0
2937 { 1, { 1 }, LSHR_CLOBBERS_T },
2938 { 1, { 2 }, 0 },
2939 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2940 { 2, { 2, 2 }, 0 }, // 4
2941 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2942 { 3, { 2, 2, 2 }, 0 },
2943 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2944 { 1, { 8 }, 0 }, // 8
2945 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2946 { 2, { 8, 2 }, 0 },
2947 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2948 { 3, { 8, 2, 2 }, 0 }, // 12
2949 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2950 { 3, { 8, -2, 8 }, 0 },
2951 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2952 { 1, { 16 }, 0 }, // 16
2953 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2954 { 2, { 16, 2 }, 0 },
2955 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2956 { 3, { 16, 2, 2 }, 0 }, // 20
2957 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2958 { 3, { 16, -2, 8 }, 0 },
2959 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2960 { 2, { 16, 8 }, 0 }, // 24
2961 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2962 { 3, { 16, 8, 2 }, 0 },
2963 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2964 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2965 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2966 { 3, { 16, -2, 16 }, 0 },
2968 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2969 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2970 However, the shift-and combiner code needs this entry here to be in
2971 terms of real shift insns. */
2972 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2975 /* Individual shift amounts for shift amounts < 16, up to three highmost
2976 bits might be clobbered. This is typically used when combined with some
2977 kind of sign or zero extension. */
2978 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
2980 { 0, { 0 }, 0 }, // 0
2981 { 1, { 1 }, LSHR_CLOBBERS_T },
2982 { 1, { 2 }, 0 },
2983 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2984 { 2, { 2, 2 }, 0 }, // 4
2985 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2986 { 2, { 8, -2 }, 0 },
2987 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
2988 { 1, { 8 }, 0 }, // 8
2989 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2990 { 2, { 8, 2 }, 0 },
2991 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2992 { 3, { 8, 2, 2 }, 0 }, // 12
2993 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
2994 { 2, { 16, -2 }, 0 },
2995 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
2996 { 1, { 16 }, 0 }, // 16
2997 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2998 { 2, { 16, 2 }, 0 },
2999 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3000 { 3, { 16, 2, 2 }, 0 }, // 20
3001 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3002 { 3, { 16, -2, 8 }, 0 },
3003 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3004 { 2, { 16, 8 }, 0 }, // 24
3005 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3006 { 3, { 16, 8, 2 }, 0 },
3007 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3008 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3009 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3010 { 3, { 16, -2, 16 }, 0 },
3011 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3014 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3015 will clobber the T bit. */
3016 bool
3017 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3019 gcc_assert (CONST_INT_P (shift_amount));
3021 const int shift_amount_i = INTVAL (shift_amount) & 31;
3023 /* Special case for shift count of 31: use and-rotl sequence. */
3024 if (shift_amount_i == 31)
3025 return true;
3027 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3028 & ASHL_CLOBBERS_T) != 0;
3031 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3032 instructions will clobber the T bit. */
3033 bool
3034 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3036 gcc_assert (CONST_INT_P (shift_amount));
3038 const int shift_amount_i = INTVAL (shift_amount) & 31;
3040 /* Special case for shift count of 31: use shll-movt sequence. */
3041 if (shift_amount_i == 31)
3042 return true;
3044 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3045 & LSHR_CLOBBERS_T) != 0;
3048 /* Return true if it is potentially beneficial to use a dynamic shift
3049 instruction (shad / shar) instead of a combination of 1/2/8/16
3050 shift instructions for the specified shift count.
3051 If dynamic shifts are not available, always return false. */
3052 bool
3053 sh_dynamicalize_shift_p (rtx count)
3055 gcc_assert (CONST_INT_P (count));
3057 const int shift_amount_i = INTVAL (count) & 31;
3058 int insn_count;
3060 /* For left and right shifts, there are shorter 2 insn sequences for
3061 shift amounts of 31. */
3062 if (shift_amount_i == 31)
3063 insn_count = 2;
3064 else
3065 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3067 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3070 /* Assuming we have a value that has been sign-extended by at least one bit,
3071 can we use the ext_shift_amounts with the last shift turned to an
3072 arithmetic shift to shift it by N without data loss, and quicker than by
3073 other means? */
3074 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3076 /* Return the cost of a shift. */
3077 static inline int
3078 shiftcosts (rtx x)
3080 int value;
3082 if (TARGET_SHMEDIA)
3083 return 1;
3085 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3087 if (GET_MODE (x) == DImode
3088 && CONST_INT_P (XEXP (x, 1))
3089 && INTVAL (XEXP (x, 1)) == 1)
3090 return 2;
3092 /* Everything else is invalid, because there is no pattern for it. */
3093 return -1;
3095 /* If shift by a non constant, then this will be expensive. */
3096 if (!CONST_INT_P (XEXP (x, 1)))
3097 return SH_DYNAMIC_SHIFT_COST;
3099 /* Otherwise, return the true cost in instructions. Cope with out of range
3100 shift counts more or less arbitrarily. */
3101 value = INTVAL (XEXP (x, 1)) & 31;
3103 if (GET_CODE (x) == ASHIFTRT)
3105 int cost = ashiftrt_insns[value];
3106 /* If dynamic shifts are available and profitable in this case, then we
3107 put the constant in a reg and use shad. */
3108 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3109 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3110 return cost;
3112 else
3113 return ashl_lshr_seq[value].insn_count;
3116 /* Return the cost of an AND/XOR/IOR operation. */
3117 static inline int
3118 and_xor_ior_costs (rtx x, int code)
3120 /* On SH1-4 we have only max. SImode operations.
3121 Double the cost for modes > SImode. */
3122 const int cost_scale = !TARGET_SHMEDIA
3123 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3124 ? 2 : 1;
3126 /* A logical operation with two registers is a single cycle
3127 instruction. */
3128 if (!CONST_INT_P (XEXP (x, 1)))
3129 return 1 * cost_scale;
3131 int i = INTVAL (XEXP (x, 1));
3133 if (TARGET_SHMEDIA)
3135 if (satisfies_constraint_I10 (XEXP (x, 1))
3136 || satisfies_constraint_J16 (XEXP (x, 1)))
3137 return 1;
3138 else
3139 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3142 /* These constants are single cycle extu.[bw] instructions. */
3143 if ((i == 0xff || i == 0xffff) && code == AND)
3144 return 1 * cost_scale;
3145 /* Constants that can be used in an instruction as an immediate are
3146 a single cycle, but this requires r0, so make it a little more
3147 expensive. */
3148 if (CONST_OK_FOR_K08 (i))
3149 return 2 * cost_scale;
3150 /* Constants that can be loaded with a mov immediate need one more cycle.
3151 This case is probably unnecessary. */
3152 if (CONST_OK_FOR_I08 (i))
3153 return 2 * cost_scale;
3154 /* Any other constant requires an additional 2 cycle pc-relative load.
3155 This case is probably unnecessary. */
3156 return 3 * cost_scale;
3159 /* Return the cost of an addition or a subtraction. */
3160 static inline int
3161 addsubcosts (rtx x)
3163 if (GET_MODE (x) == SImode)
3165 /* The addc or subc patterns will eventually become one or two
3166 instructions. Below are some costs for some of the patterns
3167 which combine would reject because the costs of the individual
3168 insns in the patterns are lower.
3170 FIXME: It would be much easier if we had something like insn cost
3171 attributes and the cost calculation machinery used those attributes
3172 in the first place. This would eliminate redundant recog-like C
3173 code to calculate costs of complex patterns. */
3174 rtx op0 = XEXP (x, 0);
3175 rtx op1 = XEXP (x, 1);
3177 if (GET_CODE (x) == PLUS)
3179 if (GET_CODE (op0) == AND
3180 && XEXP (op0, 1) == const1_rtx
3181 && (GET_CODE (op1) == PLUS
3182 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3183 return 1;
3185 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3186 && GET_CODE (op1) == LSHIFTRT
3187 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3188 return 1;
3192 /* On SH1-4 we have only max. SImode operations.
3193 Double the cost for modes > SImode. */
3194 const int cost_scale = !TARGET_SHMEDIA
3195 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3196 ? 2 : 1;
3198 /* Adding a register is a single cycle insn. */
3199 if (REG_P (XEXP (x, 1))
3200 || GET_CODE (XEXP (x, 1)) == SUBREG)
3201 return 1 * cost_scale;
3203 /* Likewise for small constants. */
3204 if (CONST_INT_P (XEXP (x, 1))
3205 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3206 return 1 * cost_scale;
3208 if (TARGET_SHMEDIA)
3209 switch (GET_CODE (XEXP (x, 1)))
3211 case CONST:
3212 case LABEL_REF:
3213 case SYMBOL_REF:
3214 return TARGET_SHMEDIA64 ? 5 : 3;
3216 case CONST_INT:
3217 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3218 return 2;
3219 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3220 return 3;
3221 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3222 return 4;
3224 /* Fall through. */
3225 default:
3226 return 5;
3229 /* Any other constant requires a 2 cycle pc-relative load plus an
3230 addition. */
3231 return 3 * cost_scale;
3234 /* Return the cost of a multiply. */
3235 static inline int
3236 multcosts (rtx x ATTRIBUTE_UNUSED)
3238 if (sh_multcost >= 0)
3239 return sh_multcost;
3240 if (TARGET_SHMEDIA)
3241 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3242 accept constants. Ideally, we would use a cost of one or two and
3243 add the cost of the operand, but disregard the latter when inside loops
3244 and loop invariant code motion is still to follow.
3245 Using a multiply first and splitting it later if it's a loss
3246 doesn't work because of different sign / zero extension semantics
3247 of multiplies vs. shifts. */
3248 return optimize_size ? 2 : 3;
3250 if (TARGET_SH2)
3252 /* We have a mul insn, so we can never take more than the mul and the
3253 read of the mac reg, but count more because of the latency and extra
3254 reg usage. */
3255 if (optimize_size)
3256 return 2;
3257 return 3;
3260 /* If we're aiming at small code, then just count the number of
3261 insns in a multiply call sequence. */
3262 if (optimize_size)
3263 return 5;
3265 /* Otherwise count all the insns in the routine we'd be calling too. */
3266 return 20;
3269 /* Compute a (partial) cost for rtx X. Return true if the complete
3270 cost has been computed, and false if subexpressions should be
3271 scanned. In either case, *TOTAL contains the cost result. */
3272 static bool
3273 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3274 int *total, bool speed ATTRIBUTE_UNUSED)
3276 switch (code)
3278 /* The lower-subreg pass decides whether to split multi-word regs
3279 into individual regs by looking at the cost for a SET of certain
3280 modes with the following patterns:
3281 (set (reg) (reg))
3282 (set (reg) (const_int 0))
3283 On machines that support vector-move operations a multi-word move
3284 is the same cost as individual reg move. On SH there is no
3285 vector-move, so we have to provide the correct cost in the number
3286 of move insns to load/store the reg of the mode in question. */
3287 case SET:
3288 if (register_operand (SET_DEST (x), VOIDmode)
3289 && (register_operand (SET_SRC (x), VOIDmode)
3290 || satisfies_constraint_Z (SET_SRC (x))))
3292 const enum machine_mode mode = GET_MODE (SET_DEST (x));
3293 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3294 / mov_insn_size (mode, TARGET_SH2A));
3295 return true;
3297 return false;
3299 /* The cost of a mem access is mainly the cost of the address mode. */
3300 case MEM:
3301 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3302 true);
3303 return true;
3305 /* The cost of a sign or zero extend depends on whether the source is a
3306 reg or a mem. In case of a mem take the address into acount. */
3307 case SIGN_EXTEND:
3308 if (REG_P (XEXP (x, 0)))
3310 *total = COSTS_N_INSNS (1);
3311 return true;
3313 if (MEM_P (XEXP (x, 0)))
3315 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3316 GET_MODE (XEXP (x, 0)),
3317 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3318 return true;
3320 return false;
3322 case ZERO_EXTEND:
3323 if (REG_P (XEXP (x, 0)))
3325 *total = COSTS_N_INSNS (1);
3326 return true;
3328 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3329 && (GET_MODE (XEXP (x, 0)) == QImode
3330 || GET_MODE (XEXP (x, 0)) == HImode))
3332 /* Handle SH2A's movu.b and movu.w insn. */
3333 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3334 GET_MODE (XEXP (x, 0)),
3335 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3336 return true;
3338 return false;
3340 /* mems for SFmode and DFmode can be inside a parallel due to
3341 the way the fpscr is handled. */
3342 case PARALLEL:
3343 for (int i = 0; i < XVECLEN (x, 0); i++)
3345 rtx xx = XVECEXP (x, 0, i);
3346 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3348 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3349 GET_MODE (XEXP (xx, 0)),
3350 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3351 return true;
3353 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3355 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3356 GET_MODE (XEXP (xx, 1)),
3357 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3358 return true;
3362 if (sh_1el_vec (x, VOIDmode))
3363 *total = outer_code != SET;
3364 else if (sh_rep_vec (x, VOIDmode))
3365 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3366 + (outer_code != SET));
3367 else
3368 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3369 return true;
3371 case CONST_INT:
3372 if (TARGET_SHMEDIA)
3374 if (INTVAL (x) == 0)
3375 *total = 0;
3376 else if (outer_code == AND && and_operand ((x), DImode))
3377 *total = 0;
3378 else if ((outer_code == IOR || outer_code == XOR
3379 || outer_code == PLUS)
3380 && CONST_OK_FOR_I10 (INTVAL (x)))
3381 *total = 0;
3382 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3383 *total = COSTS_N_INSNS (outer_code != SET);
3384 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3385 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3386 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3387 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3388 else
3389 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3390 return true;
3392 if (CONST_OK_FOR_I08 (INTVAL (x)))
3393 *total = 0;
3394 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3395 && CONST_OK_FOR_K08 (INTVAL (x)))
3396 *total = 1;
3397 /* prepare_cmp_insn will force costly constants int registers before
3398 the cbranch[sd]i4 patterns can see them, so preserve potentially
3399 interesting ones not covered by I08 above. */
3400 else if (outer_code == COMPARE
3401 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3402 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3403 || INTVAL (x) == 0x7fffffff
3404 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3405 *total = 1;
3406 else
3407 *total = 8;
3408 return true;
3410 case EQ:
3411 /* An and with a constant compared against zero is
3412 most likely going to be a TST #imm, R0 instruction.
3413 Notice that this does not catch the zero_extract variants from
3414 the md file. */
3415 if (GET_CODE (XEXP (x, 0)) == AND
3416 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3418 *total = 1;
3419 return true;
3421 else
3422 return false;
3424 case SMIN:
3425 case SMAX:
3426 /* This is most likely a clips.b or clips.w insn that is being made up
3427 by combine. */
3428 if (TARGET_SH2A
3429 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3430 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3431 && REG_P (XEXP (XEXP (x, 0), 0))
3432 && CONST_INT_P (XEXP (x, 1)))
3434 *total = COSTS_N_INSNS (1);
3435 return true;
3437 else
3438 return false;
3440 case CONST:
3441 case LABEL_REF:
3442 case SYMBOL_REF:
3443 if (TARGET_SHMEDIA64)
3444 *total = COSTS_N_INSNS (4);
3445 else if (TARGET_SHMEDIA32)
3446 *total = COSTS_N_INSNS (2);
3447 else
3448 *total = 5;
3449 return true;
3451 case CONST_DOUBLE:
3452 if (TARGET_SHMEDIA)
3453 *total = COSTS_N_INSNS (4);
3454 /* prepare_cmp_insn will force costly constants int registers before
3455 the cbranchdi4 pattern can see them, so preserve potentially
3456 interesting ones. */
3457 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3458 *total = 1;
3459 else
3460 *total = 10;
3461 return true;
3463 case CONST_VECTOR:
3464 /* FIXME: This looks broken. Only the last statement has any effect.
3465 Probably this could be folded with the PARALLEL case? */
3466 if (x == CONST0_RTX (GET_MODE (x)))
3467 *total = 0;
3468 else if (sh_1el_vec (x, VOIDmode))
3469 *total = outer_code != SET;
3470 if (sh_rep_vec (x, VOIDmode))
3471 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3472 + (outer_code != SET));
3473 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3474 return true;
3476 case PLUS:
3477 case MINUS:
3478 *total = COSTS_N_INSNS (addsubcosts (x));
3479 return true;
3481 case AND:
3482 case XOR:
3483 case IOR:
3484 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3485 return true;
3487 case MULT:
3488 *total = COSTS_N_INSNS (multcosts (x));
3489 return true;
3491 case LT:
3492 case GE:
3493 /* div0s sign comparison. */
3494 if (GET_CODE (XEXP (x, 0)) == XOR
3495 && REG_P ((XEXP (XEXP (x, 0), 0)))
3496 && REG_P ((XEXP (XEXP (x, 0), 1)))
3497 && satisfies_constraint_Z (XEXP (x, 1)))
3499 *total = COSTS_N_INSNS (1);
3500 return true;
3502 else
3503 return false;
3505 case LSHIFTRT:
3506 /* div0s sign comparison. */
3507 if (GET_CODE (XEXP (x, 0)) == XOR
3508 && REG_P ((XEXP (XEXP (x, 0), 0)))
3509 && REG_P ((XEXP (XEXP (x, 0), 1)))
3510 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3512 *total = COSTS_N_INSNS (1);
3513 return true;
3515 /* Fall through to shiftcosts. */
3516 case ASHIFT:
3517 case ASHIFTRT:
3519 int cost = shiftcosts (x);
3520 if (cost < 0)
3521 return false;
3522 *total = COSTS_N_INSNS (cost);
3523 return true;
3526 case DIV:
3527 case UDIV:
3528 case MOD:
3529 case UMOD:
3530 *total = COSTS_N_INSNS (20);
3531 return true;
3533 case FLOAT:
3534 case FIX:
3535 *total = 100;
3536 return true;
3538 default:
3539 return false;
3543 /* Determine the size of the fundamental move insn that will be used
3544 for the specified mode. */
3545 static inline int
3546 mov_insn_size (enum machine_mode mode, bool consider_sh2a)
3548 const int mode_sz = GET_MODE_SIZE (mode);
3550 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3551 || (TARGET_FMOVD && mode == DFmode))
3552 return mode_sz;
3553 else
3555 /* The max. available mode for actual move insns is SImode.
3556 Larger accesses will be split into multiple loads/stores. */
3557 const int max_mov_sz = GET_MODE_SIZE (SImode);
3558 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3562 /* Determine the maximum possible displacement for a move insn for the
3563 specified mode. */
3564 static int
3565 max_mov_insn_displacement (enum machine_mode mode, bool consider_sh2a)
3567 /* The 4 byte displacement move insns are the same as the 2 byte
3568 versions but take a 12 bit displacement. All we need to do is to
3569 scale the max. displacement value accordingly. */
3570 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3572 /* SH2A supports FPU move insns with 12 bit displacements.
3573 Other variants to do not support any kind of displacements for
3574 FPU move insns. */
3575 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3576 return 0;
3577 else
3579 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3580 const int mode_sz = GET_MODE_SIZE (mode);
3581 int r = 15 * mov_insn_sz * disp_scale;
3583 /* If the mov insn will be split into multiple loads/stores, the
3584 maximum possible displacement is a bit smaller. */
3585 if (mode_sz > mov_insn_sz)
3586 r -= mode_sz - mov_insn_sz;
3587 return r;
3591 /* Determine the alignment mask for a move insn of the
3592 specified mode. */
3593 static inline int
3594 mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
3596 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3597 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3600 /* Return the displacement value of a displacement address. */
3601 static inline HOST_WIDE_INT
3602 disp_addr_displacement (rtx x)
3604 gcc_assert (satisfies_constraint_Sdd (x));
3605 return INTVAL (XEXP (XEXP (x, 0), 1));
3608 /* Compute the cost of an address. */
3609 static int
3610 sh_address_cost (rtx x, enum machine_mode mode,
3611 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3613 /* 'GBR + 0'. Account one more because of R0 restriction. */
3614 if (REG_P (x) && REGNO (x) == GBR_REG)
3615 return 2;
3617 /* Simple reg, post-inc, pre-dec addressing. */
3618 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3619 return 1;
3621 /* 'reg + disp' addressing. */
3622 if (GET_CODE (x) == PLUS
3623 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3625 /* 'GBR + disp'. Account one more because of R0 restriction. */
3626 if (REGNO (XEXP (x, 0)) == GBR_REG
3627 && gbr_displacement (XEXP (x, 1), mode))
3628 return 2;
3630 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3632 if (offset == 0)
3633 return 1;
3635 /* The displacement would fit into a 2 byte move insn.
3636 HImode and QImode loads/stores with displacement put pressure on
3637 R0 which will most likely require another reg copy. Thus account
3638 a higher cost for that. */
3639 if (offset > 0 && offset <= max_mov_insn_displacement (mode, false))
3640 return (mode == HImode || mode == QImode) ? 2 : 1;
3642 /* The displacement would fit into a 4 byte move insn (SH2A). */
3643 if (TARGET_SH2A
3644 && offset > 0 && offset <= max_mov_insn_displacement (mode, true))
3645 return 2;
3647 /* The displacement is probably out of range and will require extra
3648 calculations. */
3649 return 3;
3652 /* 'reg + reg' addressing. Account a slightly higher cost because of
3653 increased pressure on R0. */
3654 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3655 && ! TARGET_SHMEDIA)
3656 return 3;
3658 /* Not sure what it is - probably expensive. */
3659 return 10;
3662 /* Code to expand a shift. */
3663 static void
3664 gen_ashift (int type, int n, rtx reg)
3666 rtx n_rtx;
3668 /* Negative values here come from the shift_amounts array. */
3669 if (n < 0)
3671 if (type == ASHIFT)
3672 type = LSHIFTRT;
3673 else
3674 type = ASHIFT;
3675 n = -n;
3678 n_rtx = GEN_INT (n);
3679 gcc_assert (satisfies_constraint_P27 (n_rtx));
3681 switch (type)
3683 case ASHIFTRT:
3684 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3685 break;
3686 case LSHIFTRT:
3687 if (n == 1)
3688 emit_insn (gen_shlr (reg, reg));
3689 else
3690 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3691 break;
3692 case ASHIFT:
3693 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3694 break;
3695 default:
3696 gcc_unreachable ();
3700 /* Code to expand a HImode shift. */
3701 static void
3702 gen_ashift_hi (int type, int n, rtx reg)
3704 /* Negative values here come from the shift_amounts array. */
3705 if (n < 0)
3707 if (type == ASHIFT)
3708 type = LSHIFTRT;
3709 else
3710 type = ASHIFT;
3711 n = -n;
3714 switch (type)
3716 case ASHIFTRT:
3717 case LSHIFTRT:
3718 /* We don't have HImode right shift operations because using the
3719 ordinary 32 bit shift instructions for that doesn't generate proper
3720 zero/sign extension.
3721 gen_ashift_hi is only called in contexts where we know that the
3722 sign extension works out correctly. */
3724 int offset = 0;
3725 if (GET_CODE (reg) == SUBREG)
3727 offset = SUBREG_BYTE (reg);
3728 reg = SUBREG_REG (reg);
3730 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3731 break;
3733 case ASHIFT:
3734 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3735 break;
3739 /* Output RTL to split a constant shift into its component SH constant
3740 shift instructions. */
3741 void
3742 gen_shifty_op (int code, rtx *operands)
3744 int value = INTVAL (operands[2]);
3745 int max, i;
3747 /* Truncate the shift count in case it is out of bounds. */
3748 value = value & 31;
3750 if (value == 31)
3752 if (code == LSHIFTRT)
3754 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3755 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3756 return;
3758 else if (code == ASHIFT)
3760 /* There is a two instruction sequence for 31 bit left shifts,
3761 but it requires r0. */
3762 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3764 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3765 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3766 return;
3770 else if (value == 0)
3772 /* This can happen even when optimizing, if there were subregs before
3773 reload. Don't output a nop here, as this is never optimized away;
3774 use a no-op move instead. */
3775 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3776 return;
3779 max = ashl_lshr_seq[value].insn_count;
3780 for (i = 0; i < max; i++)
3781 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3784 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3785 don't matter. */
3786 void
3787 gen_shifty_hi_op (int code, rtx *operands)
3789 int value = INTVAL (operands[2]);
3790 int max, i;
3791 void (*gen_fun) (int, int, rtx);
3793 /* This operation is used by and_shl for SImode values with a few
3794 high bits known to be cleared. */
3795 value &= 31;
3796 if (value == 0)
3798 emit_insn (gen_nop ());
3799 return;
3802 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3803 if (code == ASHIFT)
3805 max = ext_ashl_lshr_seq[value].insn_count;
3806 for (i = 0; i < max; i++)
3807 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3809 else
3810 /* When shifting right, emit the shifts in reverse order, so that
3811 solitary negative values come first. */
3812 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3813 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3816 /* Output RTL for an arithmetic right shift.
3817 ??? Rewrite to use super-optimizer sequences. */
3818 bool
3819 expand_ashiftrt (rtx *operands)
3821 rtx wrk;
3822 char func[18];
3823 int value;
3825 if (TARGET_DYNSHIFT)
3827 if (!CONST_INT_P (operands[2]))
3829 rtx count = copy_to_mode_reg (SImode, operands[2]);
3830 emit_insn (gen_negsi2 (count, count));
3831 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3832 return true;
3834 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3835 > 1 + SH_DYNAMIC_SHIFT_COST)
3837 rtx count
3838 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3839 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3840 return true;
3843 if (!CONST_INT_P (operands[2]))
3844 return false;
3846 value = INTVAL (operands[2]) & 31;
3848 if (value == 31)
3850 /* If we are called from abs expansion, arrange things so that we
3851 we can use a single MT instruction that doesn't clobber the source,
3852 if LICM can hoist out the load of the constant zero. */
3853 if (currently_expanding_to_rtl)
3855 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3856 operands[1]));
3857 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3858 return true;
3860 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3861 return true;
3863 else if (value >= 16 && value <= 19)
3865 wrk = gen_reg_rtx (SImode);
3866 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3867 value -= 16;
3868 while (value--)
3869 gen_ashift (ASHIFTRT, 1, wrk);
3870 emit_move_insn (operands[0], wrk);
3871 return true;
3873 /* Expand a short sequence inline, longer call a magic routine. */
3874 else if (value <= 5)
3876 wrk = gen_reg_rtx (SImode);
3877 emit_move_insn (wrk, operands[1]);
3878 while (value--)
3879 gen_ashift (ASHIFTRT, 1, wrk);
3880 emit_move_insn (operands[0], wrk);
3881 return true;
3884 wrk = gen_reg_rtx (Pmode);
3886 /* Load the value into an arg reg and call a helper. */
3887 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3888 sprintf (func, "__ashiftrt_r4_%d", value);
3889 function_symbol (wrk, func, SFUNC_STATIC);
3890 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3891 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3892 return true;
3895 /* Try to find a good way to implement the combiner pattern
3896 [(set (match_operand:SI 0 "register_operand" "r")
3897 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3898 (match_operand:SI 2 "const_int_operand" "n"))
3899 (match_operand:SI 3 "const_int_operand" "n"))) .
3900 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3901 return 0 for simple right / left or left/right shift combination.
3902 return 1 for a combination of shifts with zero_extend.
3903 return 2 for a combination of shifts with an AND that needs r0.
3904 return 3 for a combination of shifts with an AND that needs an extra
3905 scratch register, when the three highmost bits of the AND mask are clear.
3906 return 4 for a combination of shifts with an AND that needs an extra
3907 scratch register, when any of the three highmost bits of the AND mask
3908 is set.
3909 If ATTRP is set, store an initial right shift width in ATTRP[0],
3910 and the instruction length in ATTRP[1] . These values are not valid
3911 when returning 0.
3912 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3913 shift_amounts for the last shift value that is to be used before the
3914 sign extend. */
3916 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3918 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3919 int left = INTVAL (left_rtx), right;
3920 int best = 0;
3921 int cost, best_cost = 10000;
3922 int best_right = 0, best_len = 0;
3923 int i;
3924 int can_ext;
3926 if (left < 0 || left > 31)
3927 return 0;
3928 if (CONST_INT_P (mask_rtx))
3929 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3930 else
3931 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3932 /* Can this be expressed as a right shift / left shift pair? */
3933 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3934 right = exact_log2 (lsb);
3935 mask2 = ~(mask + lsb - 1);
3936 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3937 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3938 if (! mask2)
3939 best_cost = ashl_lshr_seq[right].insn_count
3940 + ashl_lshr_seq[right + left].insn_count;
3941 /* mask has no trailing zeroes <==> ! right */
3942 else if (! right && mask2 == ~(lsb2 - 1))
3944 int late_right = exact_log2 (lsb2);
3945 best_cost = ashl_lshr_seq[left + late_right].insn_count
3946 + ashl_lshr_seq[late_right].insn_count;
3948 /* Try to use zero extend. */
3949 if (mask2 == ~(lsb2 - 1))
3951 int width, first;
3953 for (width = 8; width <= 16; width += 8)
3955 /* Can we zero-extend right away? */
3956 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3958 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3959 + ext_ashl_lshr_seq[left + right].insn_count;
3960 if (cost < best_cost)
3962 best = 1;
3963 best_cost = cost;
3964 best_right = right;
3965 best_len = cost;
3966 if (attrp)
3967 attrp[2] = -1;
3969 continue;
3971 /* ??? Could try to put zero extend into initial right shift,
3972 or even shift a bit left before the right shift. */
3973 /* Determine value of first part of left shift, to get to the
3974 zero extend cut-off point. */
3975 first = width - exact_log2 (lsb2) + right;
3976 if (first >= 0 && right + left - first >= 0)
3978 cost = ext_ashl_lshr_seq[right].insn_count
3979 + ext_ashl_lshr_seq[first].insn_count + 1
3980 + ext_ashl_lshr_seq[right + left - first].insn_count;
3982 if (cost < best_cost)
3984 best = 1;
3985 best_cost = cost;
3986 best_right = right;
3987 best_len = cost;
3988 if (attrp)
3989 attrp[2] = first;
3994 /* Try to use r0 AND pattern */
3995 for (i = 0; i <= 2; i++)
3997 if (i > right)
3998 break;
3999 if (! CONST_OK_FOR_K08 (mask >> i))
4000 continue;
4001 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4002 if (cost < best_cost)
4004 best = 2;
4005 best_cost = cost;
4006 best_right = i;
4007 best_len = cost - 1;
4010 /* Try to use a scratch register to hold the AND operand. */
4011 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4012 for (i = 0; i <= 2; i++)
4014 if (i > right)
4015 break;
4016 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4017 + (can_ext
4018 ? ext_ashl_lshr_seq
4019 : ashl_lshr_seq)[left + i].insn_count;
4020 if (cost < best_cost)
4022 best = 4 - can_ext;
4023 best_cost = cost;
4024 best_right = i;
4025 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4029 if (attrp)
4031 attrp[0] = best_right;
4032 attrp[1] = best_len;
4034 return best;
4037 /* This is used in length attributes of the unnamed instructions
4038 corresponding to shl_and_kind return values of 1 and 2. */
4040 shl_and_length (rtx insn)
4042 rtx set_src, left_rtx, mask_rtx;
4043 int attributes[3];
4045 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4046 left_rtx = XEXP (XEXP (set_src, 0), 1);
4047 mask_rtx = XEXP (set_src, 1);
4048 shl_and_kind (left_rtx, mask_rtx, attributes);
4049 return attributes[1];
4052 /* This is used in length attribute of the and_shl_scratch instruction. */
4054 shl_and_scr_length (rtx insn)
4056 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4057 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4058 rtx op = XEXP (set_src, 0);
4059 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4060 op = XEXP (XEXP (op, 0), 0);
4061 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4064 /* Generate rtl for instructions for which shl_and_kind advised a particular
4065 method of generating them, i.e. returned zero. */
4066 bool
4067 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4069 int attributes[3];
4070 unsigned HOST_WIDE_INT mask;
4071 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4072 int right, total_shift;
4073 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4075 right = attributes[0];
4076 total_shift = INTVAL (left_rtx) + right;
4077 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4078 switch (kind)
4080 default:
4081 return true;
4082 case 1:
4084 int first = attributes[2];
4085 rtx operands[3];
4087 if (first < 0)
4089 emit_insn ((mask << right) <= 0xff
4090 ? gen_zero_extendqisi2 (dest,
4091 gen_lowpart (QImode, source))
4092 : gen_zero_extendhisi2 (dest,
4093 gen_lowpart (HImode, source)));
4094 source = dest;
4096 if (source != dest)
4097 emit_insn (gen_movsi (dest, source));
4098 operands[0] = dest;
4099 if (right)
4101 operands[2] = GEN_INT (right);
4102 gen_shifty_hi_op (LSHIFTRT, operands);
4104 if (first > 0)
4106 operands[2] = GEN_INT (first);
4107 gen_shifty_hi_op (ASHIFT, operands);
4108 total_shift -= first;
4109 mask <<= first;
4111 if (first >= 0)
4112 emit_insn (mask <= 0xff
4113 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4114 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4115 if (total_shift > 0)
4117 operands[2] = GEN_INT (total_shift);
4118 gen_shifty_hi_op (ASHIFT, operands);
4120 break;
4122 case 4:
4123 shift_gen_fun = gen_shifty_op;
4124 case 3:
4125 /* If the topmost bit that matters is set, set the topmost bits
4126 that don't matter. This way, we might be able to get a shorter
4127 signed constant. */
4128 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4129 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4130 case 2:
4131 /* Don't expand fine-grained when combining, because that will
4132 make the pattern fail. */
4133 if (currently_expanding_to_rtl
4134 || reload_in_progress || reload_completed)
4136 rtx operands[3];
4138 /* Cases 3 and 4 should be handled by this split
4139 only while combining */
4140 gcc_assert (kind <= 2);
4141 if (right)
4143 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4144 source = dest;
4146 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4147 if (total_shift)
4149 operands[0] = dest;
4150 operands[1] = dest;
4151 operands[2] = GEN_INT (total_shift);
4152 shift_gen_fun (ASHIFT, operands);
4154 break;
4156 else
4158 int neg = 0;
4159 if (kind != 4 && total_shift < 16)
4161 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4162 if (neg > 0)
4163 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4164 else
4165 neg = 0;
4167 emit_insn (gen_and_shl_scratch (dest, source,
4168 GEN_INT (right),
4169 GEN_INT (mask),
4170 GEN_INT (total_shift + neg),
4171 GEN_INT (neg)));
4172 emit_insn (gen_movsi (dest, dest));
4173 break;
4176 return false;
4179 /* Try to find a good way to implement the combiner pattern
4180 [(set (match_operand:SI 0 "register_operand" "=r")
4181 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4182 (match_operand:SI 2 "const_int_operand" "n")
4183 (match_operand:SI 3 "const_int_operand" "n")
4184 (const_int 0)))
4185 (clobber (reg:SI T_REG))]
4186 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4187 return 0 for simple left / right shift combination.
4188 return 1 for left shift / 8 bit sign extend / left shift.
4189 return 2 for left shift / 16 bit sign extend / left shift.
4190 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4191 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4192 return 5 for left shift / 16 bit sign extend / right shift
4193 return 6 for < 8 bit sign extend / left shift.
4194 return 7 for < 8 bit sign extend / left shift / single right shift.
4195 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4197 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4199 int left, size, insize, ext;
4200 int cost = 0, best_cost;
4201 int kind;
4203 left = INTVAL (left_rtx);
4204 size = INTVAL (size_rtx);
4205 insize = size - left;
4206 gcc_assert (insize > 0);
4207 /* Default to left / right shift. */
4208 kind = 0;
4209 best_cost = ashl_lshr_seq[32 - insize].insn_count
4210 + ashl_lshr_seq[32 - size].insn_count;
4211 if (size <= 16)
4213 /* 16 bit shift / sign extend / 16 bit shift */
4214 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4215 + ashl_lshr_seq[16 - size].insn_count;
4216 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4217 below, by alternative 3 or something even better. */
4218 if (cost < best_cost)
4220 kind = 5;
4221 best_cost = cost;
4224 /* Try a plain sign extend between two shifts. */
4225 for (ext = 16; ext >= insize; ext -= 8)
4227 if (ext <= size)
4229 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4230 + ashl_lshr_seq[size - ext].insn_count;
4231 if (cost < best_cost)
4233 kind = ext / (unsigned) 8;
4234 best_cost = cost;
4237 /* Check if we can do a sloppy shift with a final signed shift
4238 restoring the sign. */
4239 if (EXT_SHIFT_SIGNED (size - ext))
4240 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4241 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4242 /* If not, maybe it's still cheaper to do the second shift sloppy,
4243 and do a final sign extend? */
4244 else if (size <= 16)
4245 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4246 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4247 + 1;
4248 else
4249 continue;
4250 if (cost < best_cost)
4252 kind = ext / (unsigned) 8 + 2;
4253 best_cost = cost;
4256 /* Check if we can sign extend in r0 */
4257 if (insize < 8)
4259 cost = 3 + ashl_lshr_seq[left].insn_count;
4260 if (cost < best_cost)
4262 kind = 6;
4263 best_cost = cost;
4265 /* Try the same with a final signed shift. */
4266 if (left < 31)
4268 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4269 if (cost < best_cost)
4271 kind = 7;
4272 best_cost = cost;
4276 if (TARGET_DYNSHIFT)
4278 /* Try to use a dynamic shift. */
4279 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4280 if (cost < best_cost)
4282 kind = 0;
4283 best_cost = cost;
4286 if (costp)
4287 *costp = cost;
4288 return kind;
4291 /* Function to be used in the length attribute of the instructions
4292 implementing this pattern. */
4294 shl_sext_length (rtx insn)
4296 rtx set_src, left_rtx, size_rtx;
4297 int cost;
4299 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4300 left_rtx = XEXP (XEXP (set_src, 0), 1);
4301 size_rtx = XEXP (set_src, 1);
4302 shl_sext_kind (left_rtx, size_rtx, &cost);
4303 return cost;
4306 /* Generate rtl for this pattern */
4307 bool
4308 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4310 int kind;
4311 int left, size, insize, cost;
4312 rtx operands[3];
4314 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4315 left = INTVAL (left_rtx);
4316 size = INTVAL (size_rtx);
4317 insize = size - left;
4318 switch (kind)
4320 case 1:
4321 case 2:
4322 case 3:
4323 case 4:
4325 int ext = kind & 1 ? 8 : 16;
4326 int shift2 = size - ext;
4328 /* Don't expand fine-grained when combining, because that will
4329 make the pattern fail. */
4330 if (! currently_expanding_to_rtl
4331 && ! reload_in_progress && ! reload_completed)
4333 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4334 emit_insn (gen_movsi (dest, source));
4335 break;
4337 if (dest != source)
4338 emit_insn (gen_movsi (dest, source));
4339 operands[0] = dest;
4340 if (ext - insize)
4342 operands[2] = GEN_INT (ext - insize);
4343 gen_shifty_hi_op (ASHIFT, operands);
4345 emit_insn (kind & 1
4346 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4347 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4348 if (kind <= 2)
4350 if (shift2)
4352 operands[2] = GEN_INT (shift2);
4353 gen_shifty_op (ASHIFT, operands);
4356 else
4358 if (shift2 > 0)
4360 if (EXT_SHIFT_SIGNED (shift2))
4362 operands[2] = GEN_INT (shift2 + 1);
4363 gen_shifty_op (ASHIFT, operands);
4364 operands[2] = const1_rtx;
4365 gen_shifty_op (ASHIFTRT, operands);
4366 break;
4368 operands[2] = GEN_INT (shift2);
4369 gen_shifty_hi_op (ASHIFT, operands);
4371 else if (shift2)
4373 operands[2] = GEN_INT (-shift2);
4374 gen_shifty_hi_op (LSHIFTRT, operands);
4376 emit_insn (size <= 8
4377 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4378 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4380 break;
4382 case 5:
4384 int i = 16 - size;
4385 if (! currently_expanding_to_rtl
4386 && ! reload_in_progress && ! reload_completed)
4387 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4388 else
4390 operands[0] = dest;
4391 operands[2] = GEN_INT (16 - insize);
4392 gen_shifty_hi_op (ASHIFT, operands);
4393 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4395 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4396 while (--i >= 0)
4397 gen_ashift (ASHIFTRT, 1, dest);
4398 break;
4400 case 6:
4401 case 7:
4402 /* Don't expand fine-grained when combining, because that will
4403 make the pattern fail. */
4404 if (! currently_expanding_to_rtl
4405 && ! reload_in_progress && ! reload_completed)
4407 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4408 emit_insn (gen_movsi (dest, source));
4409 break;
4411 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4412 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4413 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4414 operands[0] = dest;
4415 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4416 gen_shifty_op (ASHIFT, operands);
4417 if (kind == 7)
4418 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4419 break;
4420 default:
4421 return true;
4423 return false;
4426 /* Prefix a symbol_ref name with "datalabel". */
4428 gen_datalabel_ref (rtx sym)
4430 const char *str;
4432 if (GET_CODE (sym) == LABEL_REF)
4433 return gen_rtx_CONST (GET_MODE (sym),
4434 gen_rtx_UNSPEC (GET_MODE (sym),
4435 gen_rtvec (1, sym),
4436 UNSPEC_DATALABEL));
4438 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4440 str = XSTR (sym, 0);
4441 /* Share all SYMBOL_REF strings with the same value - that is important
4442 for cse. */
4443 str = IDENTIFIER_POINTER (get_identifier (str));
4444 XSTR (sym, 0) = str;
4446 return sym;
4450 static alloc_pool label_ref_list_pool;
4452 typedef struct label_ref_list_d
4454 rtx label;
4455 struct label_ref_list_d *next;
4456 } *label_ref_list_t;
4458 /* The SH cannot load a large constant into a register, constants have to
4459 come from a pc relative load. The reference of a pc relative load
4460 instruction must be less than 1k in front of the instruction. This
4461 means that we often have to dump a constant inside a function, and
4462 generate code to branch around it.
4464 It is important to minimize this, since the branches will slow things
4465 down and make things bigger.
4467 Worst case code looks like:
4469 mov.l L1,rn
4470 bra L2
4472 align
4473 L1: .long value
4477 mov.l L3,rn
4478 bra L4
4480 align
4481 L3: .long value
4485 We fix this by performing a scan before scheduling, which notices which
4486 instructions need to have their operands fetched from the constant table
4487 and builds the table.
4489 The algorithm is:
4491 scan, find an instruction which needs a pcrel move. Look forward, find the
4492 last barrier which is within MAX_COUNT bytes of the requirement.
4493 If there isn't one, make one. Process all the instructions between
4494 the find and the barrier.
4496 In the above example, we can tell that L3 is within 1k of L1, so
4497 the first move can be shrunk from the 3 insn+constant sequence into
4498 just 1 insn, and the constant moved to L3 to make:
4500 mov.l L1,rn
4502 mov.l L3,rn
4503 bra L4
4505 align
4506 L3:.long value
4507 L4:.long value
4509 Then the second move becomes the target for the shortening process. */
4511 typedef struct
4513 rtx value; /* Value in table. */
4514 rtx label; /* Label of value. */
4515 label_ref_list_t wend; /* End of window. */
4516 enum machine_mode mode; /* Mode of value. */
4518 /* True if this constant is accessed as part of a post-increment
4519 sequence. Note that HImode constants are never accessed in this way. */
4520 bool part_of_sequence_p;
4521 } pool_node;
4523 /* The maximum number of constants that can fit into one pool, since
4524 constants in the range 0..510 are at least 2 bytes long, and in the
4525 range from there to 1018 at least 4 bytes. */
4527 #define MAX_POOL_SIZE 372
4528 static pool_node pool_vector[MAX_POOL_SIZE];
4529 static int pool_size;
4530 static rtx pool_window_label;
4531 static int pool_window_last;
4533 static int max_labelno_before_reorg;
4535 /* ??? If we need a constant in HImode which is the truncated value of a
4536 constant we need in SImode, we could combine the two entries thus saving
4537 two bytes. Is this common enough to be worth the effort of implementing
4538 it? */
4540 /* ??? This stuff should be done at the same time that we shorten branches.
4541 As it is now, we must assume that all branches are the maximum size, and
4542 this causes us to almost always output constant pools sooner than
4543 necessary. */
4545 /* Add a constant to the pool and return its label. */
4546 static rtx
4547 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4549 int i;
4550 rtx lab, new_rtx;
4551 label_ref_list_t ref, newref;
4553 /* First see if we've already got it. */
4554 for (i = 0; i < pool_size; i++)
4556 if (x->code == pool_vector[i].value->code
4557 && mode == pool_vector[i].mode)
4559 if (x->code == CODE_LABEL)
4561 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4562 continue;
4564 if (rtx_equal_p (x, pool_vector[i].value))
4566 lab = new_rtx = 0;
4567 if (! last_value
4568 || ! i
4569 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4571 new_rtx = gen_label_rtx ();
4572 LABEL_REFS (new_rtx) = pool_vector[i].label;
4573 pool_vector[i].label = lab = new_rtx;
4575 if (lab && pool_window_label)
4577 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4578 newref->label = pool_window_label;
4579 ref = pool_vector[pool_window_last].wend;
4580 newref->next = ref;
4581 pool_vector[pool_window_last].wend = newref;
4583 if (new_rtx)
4584 pool_window_label = new_rtx;
4585 pool_window_last = i;
4586 return lab;
4591 /* Need a new one. */
4592 pool_vector[pool_size].value = x;
4593 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4595 lab = 0;
4596 pool_vector[pool_size - 1].part_of_sequence_p = true;
4598 else
4599 lab = gen_label_rtx ();
4600 pool_vector[pool_size].mode = mode;
4601 pool_vector[pool_size].label = lab;
4602 pool_vector[pool_size].wend = NULL;
4603 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4604 if (lab && pool_window_label)
4606 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4607 newref->label = pool_window_label;
4608 ref = pool_vector[pool_window_last].wend;
4609 newref->next = ref;
4610 pool_vector[pool_window_last].wend = newref;
4612 if (lab)
4613 pool_window_label = lab;
4614 pool_window_last = pool_size;
4615 pool_size++;
4616 return lab;
4619 /* Output the literal table. START, if nonzero, is the first instruction
4620 this table is needed for, and also indicates that there is at least one
4621 casesi_worker_2 instruction; We have to emit the operand3 labels from
4622 these insns at a 4-byte aligned position. BARRIER is the barrier
4623 after which we are to place the table. */
4624 static void
4625 dump_table (rtx start, rtx barrier)
4627 rtx scan = barrier;
4628 int i;
4629 bool need_align = true;
4630 rtx lab;
4631 label_ref_list_t ref;
4632 bool have_df = false;
4634 /* Do two passes, first time dump out the HI sized constants. */
4636 for (i = 0; i < pool_size; i++)
4638 pool_node *p = &pool_vector[i];
4640 if (p->mode == HImode)
4642 if (need_align)
4644 scan = emit_insn_after (gen_align_2 (), scan);
4645 need_align = false;
4647 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4648 scan = emit_label_after (lab, scan);
4649 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4650 scan);
4651 for (ref = p->wend; ref; ref = ref->next)
4653 lab = ref->label;
4654 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4657 else if (p->mode == DFmode)
4658 have_df = true;
4661 need_align = true;
4663 if (start)
4665 scan = emit_insn_after (gen_align_4 (), scan);
4666 need_align = false;
4667 for (; start != barrier; start = NEXT_INSN (start))
4668 if (NONJUMP_INSN_P (start)
4669 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4671 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4672 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4674 scan = emit_label_after (lab, scan);
4677 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4679 rtx align_insn = NULL_RTX;
4681 scan = emit_label_after (gen_label_rtx (), scan);
4682 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4683 need_align = false;
4685 for (i = 0; i < pool_size; i++)
4687 pool_node *p = &pool_vector[i];
4689 switch (p->mode)
4691 case HImode:
4692 break;
4693 case SImode:
4694 case SFmode:
4695 if (align_insn && !p->part_of_sequence_p)
4697 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4698 emit_label_before (lab, align_insn);
4699 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4700 align_insn);
4701 for (ref = p->wend; ref; ref = ref->next)
4703 lab = ref->label;
4704 emit_insn_before (gen_consttable_window_end (lab),
4705 align_insn);
4707 delete_insn (align_insn);
4708 align_insn = NULL_RTX;
4709 continue;
4711 else
4713 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4714 scan = emit_label_after (lab, scan);
4715 scan = emit_insn_after (gen_consttable_4 (p->value,
4716 const0_rtx), scan);
4717 need_align = ! need_align;
4719 break;
4720 case DFmode:
4721 if (need_align)
4723 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4724 align_insn = scan;
4725 need_align = false;
4727 case DImode:
4728 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4729 scan = emit_label_after (lab, scan);
4730 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4731 scan);
4732 break;
4733 default:
4734 gcc_unreachable ();
4737 if (p->mode != HImode)
4739 for (ref = p->wend; ref; ref = ref->next)
4741 lab = ref->label;
4742 scan = emit_insn_after (gen_consttable_window_end (lab),
4743 scan);
4748 pool_size = 0;
4751 for (i = 0; i < pool_size; i++)
4753 pool_node *p = &pool_vector[i];
4755 switch (p->mode)
4757 case HImode:
4758 break;
4759 case SImode:
4760 case SFmode:
4761 if (need_align)
4763 need_align = false;
4764 scan = emit_label_after (gen_label_rtx (), scan);
4765 scan = emit_insn_after (gen_align_4 (), scan);
4767 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4768 scan = emit_label_after (lab, scan);
4769 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4770 scan);
4771 break;
4772 case DFmode:
4773 case DImode:
4774 if (need_align)
4776 need_align = false;
4777 scan = emit_label_after (gen_label_rtx (), scan);
4778 scan = emit_insn_after (gen_align_4 (), scan);
4780 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4781 scan = emit_label_after (lab, scan);
4782 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4783 scan);
4784 break;
4785 default:
4786 gcc_unreachable ();
4789 if (p->mode != HImode)
4791 for (ref = p->wend; ref; ref = ref->next)
4793 lab = ref->label;
4794 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4799 scan = emit_insn_after (gen_consttable_end (), scan);
4800 scan = emit_barrier_after (scan);
4801 pool_size = 0;
4802 pool_window_label = NULL_RTX;
4803 pool_window_last = 0;
4806 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4808 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4810 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4811 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4812 need to fix it if the input value is CONST_OK_FOR_I08. */
4813 static bool
4814 broken_move (rtx insn)
4816 if (NONJUMP_INSN_P (insn))
4818 rtx pat = PATTERN (insn);
4819 if (GET_CODE (pat) == PARALLEL)
4820 pat = XVECEXP (pat, 0, 0);
4821 if (GET_CODE (pat) == SET
4822 /* We can load any 8-bit value if we don't care what the high
4823 order bits end up as. */
4824 && GET_MODE (SET_DEST (pat)) != QImode
4825 && (CONSTANT_P (SET_SRC (pat))
4826 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4827 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4828 /* Match mova_const. */
4829 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4830 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4831 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4832 && ! (TARGET_SH2E
4833 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4834 && (fp_zero_operand (SET_SRC (pat))
4835 || fp_one_operand (SET_SRC (pat)))
4836 /* In general we don't know the current setting of fpscr, so
4837 disable fldi.
4838 There is an exception if this was a register-register move
4839 before reload - and hence it was ascertained that we have
4840 single precision setting - and in a post-reload optimization
4841 we changed this to do a constant load. In that case
4842 we don't have an r0 clobber, hence we must use fldi. */
4843 && (TARGET_FMOVD
4844 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4845 == SCRATCH))
4846 && REG_P (SET_DEST (pat))
4847 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4848 && ! (TARGET_SH2A
4849 && GET_MODE (SET_DEST (pat)) == SImode
4850 && (satisfies_constraint_I20 (SET_SRC (pat))
4851 || satisfies_constraint_I28 (SET_SRC (pat))))
4852 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4853 return true;
4856 return false;
4859 /* Return true if the specified insn is a mova insn. */
4860 static bool
4861 mova_p (rtx insn)
4863 return (NONJUMP_INSN_P (insn)
4864 && GET_CODE (PATTERN (insn)) == SET
4865 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4866 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4867 /* Don't match mova_const. */
4868 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4871 /* Fix up a mova from a switch that went out of range. */
4872 static void
4873 fixup_mova (rtx mova)
4875 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4876 if (! flag_pic)
4878 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4879 INSN_CODE (mova) = -1;
4881 else
4883 rtx worker = mova;
4884 rtx lab = gen_label_rtx ();
4885 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4889 worker = NEXT_INSN (worker);
4890 gcc_assert (worker
4891 && !LABEL_P (worker)
4892 && !JUMP_P (worker));
4893 } while (NOTE_P (worker)
4894 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4895 wpat = PATTERN (worker);
4896 wpat0 = XVECEXP (wpat, 0, 0);
4897 wpat1 = XVECEXP (wpat, 0, 1);
4898 wsrc = SET_SRC (wpat0);
4899 PATTERN (worker) = (gen_casesi_worker_2
4900 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4901 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4902 XEXP (wpat1, 0)));
4903 INSN_CODE (worker) = -1;
4904 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4905 base = gen_rtx_LABEL_REF (Pmode, lab);
4906 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4907 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4908 INSN_CODE (mova) = -1;
4912 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4913 *num_mova, and check if the new mova is not nested within the first one.
4914 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4915 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4916 static int
4917 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
4919 int n_addr = 0; /* Initialization to shut up spurious warning. */
4920 int f_target, n_target = 0; /* Likewise. */
4922 if (optimize)
4924 /* If NEW_MOVA has no address yet, it will be handled later. */
4925 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4926 return -1;
4928 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4929 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4930 if (n_addr > n_target || n_addr + 1022 < n_target)
4932 /* Change the mova into a load.
4933 broken_move will then return true for it. */
4934 fixup_mova (new_mova);
4935 return 1;
4938 if (!(*num_mova)++)
4940 *first_mova = new_mova;
4941 return 2;
4943 if (!optimize
4944 || ((f_target
4945 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4946 >= n_target))
4947 return -1;
4949 (*num_mova)--;
4950 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4951 > n_target - n_addr)
4953 fixup_mova (*first_mova);
4954 return 0;
4956 else
4958 fixup_mova (new_mova);
4959 return 1;
4963 /* Find the last barrier from insn FROM which is close enough to hold the
4964 constant pool. If we can't find one, then create one near the end of
4965 the range. */
4966 static rtx
4967 find_barrier (int num_mova, rtx mova, rtx from)
4969 int count_si = 0;
4970 int count_hi = 0;
4971 int found_hi = 0;
4972 int found_si = 0;
4973 int found_di = 0;
4974 int hi_align = 2;
4975 int si_align = 2;
4976 int leading_mova = num_mova;
4977 rtx barrier_before_mova = NULL_RTX;
4978 rtx found_barrier = NULL_RTX;
4979 rtx good_barrier = NULL_RTX;
4980 int si_limit;
4981 int hi_limit;
4982 rtx orig = from;
4983 rtx last_got = NULL_RTX;
4984 rtx last_symoff = NULL_RTX;
4986 /* For HImode: range is 510, add 4 because pc counts from address of
4987 second instruction after this one, subtract 2 for the jump instruction
4988 that we may need to emit before the table, subtract 2 for the instruction
4989 that fills the jump delay slot (in very rare cases, reorg will take an
4990 instruction from after the constant pool or will leave the delay slot
4991 empty). This gives 510.
4992 For SImode: range is 1020, add 4 because pc counts from address of
4993 second instruction after this one, subtract 2 in case pc is 2 byte
4994 aligned, subtract 2 for the jump instruction that we may need to emit
4995 before the table, subtract 2 for the instruction that fills the jump
4996 delay slot. This gives 1018. */
4998 /* The branch will always be shortened now that the reference address for
4999 forward branches is the successor address, thus we need no longer make
5000 adjustments to the [sh]i_limit for -O0. */
5002 si_limit = 1018;
5003 hi_limit = 510;
5005 while (from && count_si < si_limit && count_hi < hi_limit)
5007 int inc = get_attr_length (from);
5008 int new_align = 1;
5010 /* If this is a label that existed at the time of the compute_alignments
5011 call, determine the alignment. N.B. When find_barrier recurses for
5012 an out-of-reach mova, we might see labels at the start of previously
5013 inserted constant tables. */
5014 if (LABEL_P (from)
5015 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5017 if (optimize)
5018 new_align = 1 << label_to_alignment (from);
5019 else if (BARRIER_P (prev_nonnote_insn (from)))
5020 new_align = 1 << barrier_align (from);
5021 else
5022 new_align = 1;
5023 inc = 0;
5025 /* In case we are scanning a constant table because of recursion, check
5026 for explicit alignments. If the table is long, we might be forced
5027 to emit the new table in front of it; the length of the alignment
5028 might be the last straw. */
5029 else if (NONJUMP_INSN_P (from)
5030 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5031 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5032 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5033 /* When we find the end of a constant table, paste the new constant
5034 at the end. That is better than putting it in front because
5035 this way, we don't need extra alignment for adding a 4-byte-aligned
5036 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5037 else if (NONJUMP_INSN_P (from)
5038 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5039 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5040 return from;
5042 if (BARRIER_P (from))
5044 rtx next;
5046 found_barrier = from;
5048 /* If we are at the end of the function, or in front of an alignment
5049 instruction, we need not insert an extra alignment. We prefer
5050 this kind of barrier. */
5051 if (barrier_align (from) > 2)
5052 good_barrier = from;
5054 /* If we are at the end of a hot/cold block, dump the constants
5055 here. */
5056 next = NEXT_INSN (from);
5057 if (next
5058 && NOTE_P (next)
5059 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5060 break;
5063 if (broken_move (from))
5065 rtx pat, src, dst;
5066 enum machine_mode mode;
5068 pat = PATTERN (from);
5069 if (GET_CODE (pat) == PARALLEL)
5070 pat = XVECEXP (pat, 0, 0);
5071 src = SET_SRC (pat);
5072 dst = SET_DEST (pat);
5073 mode = GET_MODE (dst);
5075 /* GOT pcrelat setting comes in pair of
5076 mova .L8,r0
5077 mov.l .L8,r12
5078 instructions. (plus add r0,r12).
5079 Remember if we see one without the other. */
5080 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5081 last_got = last_got ? NULL_RTX : from;
5082 else if (PIC_ADDR_P (src))
5083 last_got = last_got ? NULL_RTX : from;
5085 /* We must explicitly check the mode, because sometimes the
5086 front end will generate code to load unsigned constants into
5087 HImode targets without properly sign extending them. */
5088 if (mode == HImode
5089 || (mode == SImode && satisfies_constraint_I16 (src)
5090 && REGNO (dst) != FPUL_REG))
5092 found_hi += 2;
5093 /* We put the short constants before the long constants, so
5094 we must count the length of short constants in the range
5095 for the long constants. */
5096 /* ??? This isn't optimal, but is easy to do. */
5097 si_limit -= 2;
5099 else
5101 /* We dump DF/DI constants before SF/SI ones, because
5102 the limit is the same, but the alignment requirements
5103 are higher. We may waste up to 4 additional bytes
5104 for alignment, and the DF/DI constant may have
5105 another SF/SI constant placed before it. */
5106 if (TARGET_SHCOMPACT
5107 && ! found_di
5108 && (mode == DFmode || mode == DImode))
5110 found_di = 1;
5111 si_limit -= 8;
5113 while (si_align > 2 && found_si + si_align - 2 > count_si)
5114 si_align >>= 1;
5115 if (found_si > count_si)
5116 count_si = found_si;
5117 found_si += GET_MODE_SIZE (mode);
5118 if (num_mova)
5119 si_limit -= GET_MODE_SIZE (mode);
5123 if (mova_p (from))
5125 switch (untangle_mova (&num_mova, &mova, from))
5127 case 1:
5128 if (flag_pic)
5130 rtx src = SET_SRC (PATTERN (from));
5131 if (GET_CODE (src) == CONST
5132 && GET_CODE (XEXP (src, 0)) == UNSPEC
5133 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5134 last_symoff = from;
5136 break;
5137 case 0: return find_barrier (0, 0, mova);
5138 case 2:
5140 leading_mova = 0;
5141 barrier_before_mova
5142 = good_barrier ? good_barrier : found_barrier;
5144 default: break;
5146 if (found_si > count_si)
5147 count_si = found_si;
5149 else if (JUMP_TABLE_DATA_P (from)
5150 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5152 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5153 || (num_mova
5154 && (prev_nonnote_insn (from)
5155 == XEXP (MOVA_LABELREF (mova), 0))))
5156 num_mova--;
5157 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5159 /* We have just passed the barrier in front of the
5160 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5161 the ADDR_DIFF_VEC is accessed as data, just like our pool
5162 constants, this is a good opportunity to accommodate what
5163 we have gathered so far.
5164 If we waited any longer, we could end up at a barrier in
5165 front of code, which gives worse cache usage for separated
5166 instruction / data caches. */
5167 good_barrier = found_barrier;
5168 break;
5170 else
5172 rtx body = PATTERN (from);
5173 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5176 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5177 else if (JUMP_P (from)
5178 && ! TARGET_SH2
5179 && ! optimize_size)
5180 new_align = 4;
5182 /* There is a possibility that a bf is transformed into a bf/s by the
5183 delay slot scheduler. */
5184 if (JUMP_P (from)
5185 && get_attr_type (from) == TYPE_CBRANCH
5186 && ! sequence_insn_p (from))
5187 inc += 2;
5189 if (found_si)
5191 count_si += inc;
5192 if (new_align > si_align)
5194 si_limit -= (count_si - 1) & (new_align - si_align);
5195 si_align = new_align;
5197 count_si = (count_si + new_align - 1) & -new_align;
5199 if (found_hi)
5201 count_hi += inc;
5202 if (new_align > hi_align)
5204 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5205 hi_align = new_align;
5207 count_hi = (count_hi + new_align - 1) & -new_align;
5209 from = NEXT_INSN (from);
5212 if (num_mova)
5214 if (leading_mova)
5216 /* Try as we might, the leading mova is out of range. Change
5217 it into a load (which will become a pcload) and retry. */
5218 fixup_mova (mova);
5219 return find_barrier (0, 0, mova);
5221 else
5223 /* Insert the constant pool table before the mova instruction,
5224 to prevent the mova label reference from going out of range. */
5225 from = mova;
5226 good_barrier = found_barrier = barrier_before_mova;
5230 if (found_barrier)
5232 if (good_barrier && next_real_insn (found_barrier))
5233 found_barrier = good_barrier;
5235 else
5237 /* We didn't find a barrier in time to dump our stuff,
5238 so we'll make one. */
5239 rtx label = gen_label_rtx ();
5241 /* Don't emit a constant table in the middle of insns for
5242 casesi_worker_2. This is a bit overkill but is enough
5243 because casesi_worker_2 wouldn't appear so frequently. */
5244 if (last_symoff)
5245 from = last_symoff;
5247 /* If we exceeded the range, then we must back up over the last
5248 instruction we looked at. Otherwise, we just need to undo the
5249 NEXT_INSN at the end of the loop. */
5250 if (PREV_INSN (from) != orig
5251 && (count_hi > hi_limit || count_si > si_limit))
5252 from = PREV_INSN (PREV_INSN (from));
5253 else
5254 from = PREV_INSN (from);
5256 /* Don't emit a constant table int the middle of global pointer setting,
5257 since that that would move the addressing base GOT into another table.
5258 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5259 in the pool anyway, so just move up the whole constant pool.
5261 However, avoid doing so when the last single GOT mov is the starting
5262 insn itself. Going past above the start insn would create a negative
5263 offset, causing errors. */
5264 if (last_got && last_got != orig)
5265 from = PREV_INSN (last_got);
5267 /* Don't insert the constant pool table at the position which
5268 may be the landing pad. */
5269 if (flag_exceptions
5270 && CALL_P (from)
5271 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5272 from = PREV_INSN (from);
5274 /* Walk back to be just before any jump or label.
5275 Putting it before a label reduces the number of times the branch
5276 around the constant pool table will be hit. Putting it before
5277 a jump makes it more likely that the bra delay slot will be
5278 filled. */
5279 while (NOTE_P (from) || JUMP_P (from)
5280 || LABEL_P (from))
5281 from = PREV_INSN (from);
5283 /* Make sure we do not split between a call and its corresponding
5284 CALL_ARG_LOCATION note. */
5285 if (CALL_P (from))
5287 rtx next = NEXT_INSN (from);
5288 if (next && NOTE_P (next)
5289 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5290 from = next;
5293 from = emit_jump_insn_after (gen_jump (label), from);
5294 JUMP_LABEL (from) = label;
5295 LABEL_NUSES (label) = 1;
5296 found_barrier = emit_barrier_after (from);
5297 emit_label_after (label, found_barrier);
5300 return found_barrier;
5303 /* If the instruction INSN is implemented by a special function, and we can
5304 positively find the register that is used to call the sfunc, and this
5305 register is not used anywhere else in this instruction - except as the
5306 destination of a set, return this register; else, return 0. */
5308 sfunc_uses_reg (rtx insn)
5310 int i;
5311 rtx pattern, part, reg_part, reg;
5313 if (!NONJUMP_INSN_P (insn))
5314 return NULL_RTX;
5315 pattern = PATTERN (insn);
5316 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5317 return NULL_RTX;
5319 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5321 part = XVECEXP (pattern, 0, i);
5322 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5323 reg_part = part;
5325 if (! reg_part)
5326 return NULL_RTX;
5327 reg = XEXP (reg_part, 0);
5328 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5330 part = XVECEXP (pattern, 0, i);
5331 if (part == reg_part || GET_CODE (part) == CLOBBER)
5332 continue;
5333 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5334 && REG_P (SET_DEST (part)))
5335 ? SET_SRC (part) : part)))
5336 return NULL_RTX;
5338 return reg;
5341 /* See if the only way in which INSN uses REG is by calling it, or by
5342 setting it while calling it. Set *SET to a SET rtx if the register
5343 is set by INSN. */
5344 static bool
5345 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
5347 rtx pattern, reg2;
5349 *set = NULL_RTX;
5351 reg2 = sfunc_uses_reg (insn);
5352 if (reg2 && REGNO (reg2) == REGNO (reg))
5354 pattern = single_set (insn);
5355 if (pattern
5356 && REG_P (SET_DEST (pattern))
5357 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5358 *set = pattern;
5359 return false;
5361 if (!CALL_P (insn))
5363 /* We don't use rtx_equal_p because we don't care if the mode is
5364 different. */
5365 pattern = single_set (insn);
5366 if (pattern
5367 && REG_P (SET_DEST (pattern))
5368 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5370 rtx par, part;
5371 int i;
5373 *set = pattern;
5374 par = PATTERN (insn);
5375 if (GET_CODE (par) == PARALLEL)
5376 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5378 part = XVECEXP (par, 0, i);
5379 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5380 return true;
5382 return reg_mentioned_p (reg, SET_SRC (pattern));
5385 return true;
5388 pattern = PATTERN (insn);
5390 if (GET_CODE (pattern) == PARALLEL)
5392 int i;
5394 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5395 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5396 return true;
5397 pattern = XVECEXP (pattern, 0, 0);
5400 if (GET_CODE (pattern) == SET)
5402 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5404 /* We don't use rtx_equal_p, because we don't care if the
5405 mode is different. */
5406 if (!REG_P (SET_DEST (pattern))
5407 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5408 return true;
5410 *set = pattern;
5413 pattern = SET_SRC (pattern);
5416 if (GET_CODE (pattern) != CALL
5417 || !MEM_P (XEXP (pattern, 0))
5418 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5419 return true;
5421 return false;
5424 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5425 general registers. Bits 0..15 mean that the respective registers
5426 are used as inputs in the instruction. Bits 16..31 mean that the
5427 registers 0..15, respectively, are used as outputs, or are clobbered.
5428 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5430 regs_used (rtx x, int is_dest)
5432 enum rtx_code code;
5433 const char *fmt;
5434 int i, used = 0;
5436 if (! x)
5437 return used;
5438 code = GET_CODE (x);
5439 switch (code)
5441 case REG:
5442 if (REGNO (x) < 16)
5443 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5444 << (REGNO (x) + is_dest));
5445 return 0;
5446 case SUBREG:
5448 rtx y = SUBREG_REG (x);
5450 if (!REG_P (y))
5451 break;
5452 if (REGNO (y) < 16)
5453 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5454 << (REGNO (y) +
5455 subreg_regno_offset (REGNO (y),
5456 GET_MODE (y),
5457 SUBREG_BYTE (x),
5458 GET_MODE (x)) + is_dest));
5459 return 0;
5461 case SET:
5462 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5463 case RETURN:
5464 /* If there was a return value, it must have been indicated with USE. */
5465 return 0x00ffff00;
5466 case CLOBBER:
5467 is_dest = 1;
5468 break;
5469 case MEM:
5470 is_dest = 0;
5471 break;
5472 case CALL:
5473 used |= 0x00ff00f0;
5474 break;
5475 default:
5476 break;
5479 fmt = GET_RTX_FORMAT (code);
5481 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5483 if (fmt[i] == 'E')
5485 int j;
5486 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5487 used |= regs_used (XVECEXP (x, i, j), is_dest);
5489 else if (fmt[i] == 'e')
5490 used |= regs_used (XEXP (x, i), is_dest);
5492 return used;
5495 /* Create an instruction that prevents redirection of a conditional branch
5496 to the destination of the JUMP with address ADDR.
5497 If the branch needs to be implemented as an indirect jump, try to find
5498 a scratch register for it.
5499 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5500 If any preceding insn that doesn't fit into a delay slot is good enough,
5501 pass 1. Pass 2 if a definite blocking insn is needed.
5502 -1 is used internally to avoid deep recursion.
5503 If a blocking instruction is made or recognized, return it. */
5504 static rtx
5505 gen_block_redirect (rtx jump, int addr, int need_block)
5507 int dead = 0;
5508 rtx prev = prev_nonnote_insn (jump);
5509 rtx dest;
5511 /* First, check if we already have an instruction that satisfies our need. */
5512 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
5514 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5515 return prev;
5516 if (GET_CODE (PATTERN (prev)) == USE
5517 || GET_CODE (PATTERN (prev)) == CLOBBER
5518 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5519 prev = jump;
5520 else if ((need_block &= ~1) < 0)
5521 return prev;
5522 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5523 need_block = 0;
5525 if (GET_CODE (PATTERN (jump)) == RETURN)
5527 if (! need_block)
5528 return prev;
5529 /* Reorg even does nasty things with return insns that cause branches
5530 to go out of range - see find_end_label and callers. */
5531 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5533 /* We can't use JUMP_LABEL here because it might be undefined
5534 when not optimizing. */
5535 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5536 /* If the branch is out of range, try to find a scratch register for it. */
5537 if (optimize
5538 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5539 > 4092 + 4098))
5541 rtx scan;
5542 /* Don't look for the stack pointer as a scratch register,
5543 it would cause trouble if an interrupt occurred. */
5544 unsigned attempt = 0x7fff, used;
5545 int jump_left = flag_expensive_optimizations + 1;
5547 /* It is likely that the most recent eligible instruction is wanted for
5548 the delay slot. Therefore, find out which registers it uses, and
5549 try to avoid using them. */
5551 for (scan = jump; (scan = PREV_INSN (scan)); )
5553 enum rtx_code code;
5555 if (INSN_DELETED_P (scan))
5556 continue;
5557 code = GET_CODE (scan);
5558 if (code == CODE_LABEL || code == JUMP_INSN)
5559 break;
5560 if (code == INSN
5561 && GET_CODE (PATTERN (scan)) != USE
5562 && GET_CODE (PATTERN (scan)) != CLOBBER
5563 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5565 attempt &= ~regs_used (PATTERN (scan), 0);
5566 break;
5569 for (used = dead = 0, scan = JUMP_LABEL (jump);
5570 (scan = NEXT_INSN (scan)); )
5572 enum rtx_code code;
5574 if (INSN_DELETED_P (scan))
5575 continue;
5576 code = GET_CODE (scan);
5577 if (INSN_P (scan))
5579 used |= regs_used (PATTERN (scan), 0);
5580 if (code == CALL_INSN)
5581 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5582 dead |= (used >> 16) & ~used;
5583 if (dead & attempt)
5585 dead &= attempt;
5586 break;
5588 if (code == JUMP_INSN)
5590 if (jump_left-- && simplejump_p (scan))
5591 scan = JUMP_LABEL (scan);
5592 else
5593 break;
5597 /* Mask out the stack pointer again, in case it was
5598 the only 'free' register we have found. */
5599 dead &= 0x7fff;
5601 /* If the immediate destination is still in range, check for possible
5602 threading with a jump beyond the delay slot insn.
5603 Don't check if we are called recursively; the jump has been or will be
5604 checked in a different invocation then. */
5606 else if (optimize && need_block >= 0)
5608 rtx next = next_active_insn (next_active_insn (dest));
5609 if (next && JUMP_P (next)
5610 && GET_CODE (PATTERN (next)) == SET
5611 && recog_memoized (next) == CODE_FOR_jump_compact)
5613 dest = JUMP_LABEL (next);
5614 if (dest
5615 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5616 > 4092 + 4098))
5617 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5621 if (dead)
5623 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5625 /* It would be nice if we could convert the jump into an indirect
5626 jump / far branch right now, and thus exposing all constituent
5627 instructions to further optimization. However, reorg uses
5628 simplejump_p to determine if there is an unconditional jump where
5629 it should try to schedule instructions from the target of the
5630 branch; simplejump_p fails for indirect jumps even if they have
5631 a JUMP_LABEL. */
5632 rtx insn = emit_insn_before (gen_indirect_jump_scratch
5633 (reg, GEN_INT (unspec_bbr_uid++)),
5634 jump);
5635 /* ??? We would like this to have the scope of the jump, but that
5636 scope will change when a delay slot insn of an inner scope is added.
5637 Hence, after delay slot scheduling, we'll have to expect
5638 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5639 the jump. */
5641 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5642 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5643 return insn;
5645 else if (need_block)
5646 /* We can't use JUMP_LABEL here because it might be undefined
5647 when not optimizing. */
5648 return emit_insn_before (gen_block_branch_redirect
5649 (GEN_INT (unspec_bbr_uid++)),
5650 jump);
5651 return prev;
5654 #define CONDJUMP_MIN -252
5655 #define CONDJUMP_MAX 262
5656 struct far_branch
5658 /* A label (to be placed) in front of the jump
5659 that jumps to our ultimate destination. */
5660 rtx near_label;
5661 /* Where we are going to insert it if we cannot move the jump any farther,
5662 or the jump itself if we have picked up an existing jump. */
5663 rtx insert_place;
5664 /* The ultimate destination. */
5665 rtx far_label;
5666 struct far_branch *prev;
5667 /* If the branch has already been created, its address;
5668 else the address of its first prospective user. */
5669 int address;
5672 static void gen_far_branch (struct far_branch *);
5673 enum mdep_reorg_phase_e mdep_reorg_phase;
5674 static void
5675 gen_far_branch (struct far_branch *bp)
5677 rtx insn = bp->insert_place;
5678 rtx jump;
5679 rtx label = gen_label_rtx ();
5680 int ok;
5682 emit_label_after (label, insn);
5683 if (bp->far_label)
5685 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5686 LABEL_NUSES (bp->far_label)++;
5688 else
5689 jump = emit_jump_insn_after (gen_return (), insn);
5691 /* Emit a barrier so that reorg knows that any following instructions
5692 are not reachable via a fall-through path.
5693 But don't do this when not optimizing, since we wouldn't suppress the
5694 alignment for the barrier then, and could end up with out-of-range
5695 pc-relative loads. */
5696 if (optimize)
5697 emit_barrier_after (jump);
5698 emit_label_after (bp->near_label, insn);
5700 if (bp->far_label)
5701 JUMP_LABEL (jump) = bp->far_label;
5702 else
5704 rtx pat = PATTERN (jump);
5705 gcc_assert (ANY_RETURN_P (pat));
5706 JUMP_LABEL (jump) = pat;
5709 ok = invert_jump (insn, label, 1);
5710 gcc_assert (ok);
5712 /* If we are branching around a jump (rather than a return), prevent
5713 reorg from using an insn from the jump target as the delay slot insn -
5714 when reorg did this, it pessimized code (we rather hide the delay slot)
5715 and it could cause branches to go out of range. */
5716 if (bp->far_label)
5717 (emit_insn_after
5718 (gen_stuff_delay_slot
5719 (GEN_INT (unspec_bbr_uid++),
5720 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5721 insn));
5722 /* Prevent reorg from undoing our splits. */
5723 gen_block_redirect (jump, bp->address += 2, 2);
5726 /* Fix up ADDR_DIFF_VECs. */
5727 void
5728 fixup_addr_diff_vecs (rtx first)
5730 rtx insn;
5732 for (insn = first; insn; insn = NEXT_INSN (insn))
5734 rtx vec_lab, pat, prev, prevpat, x, braf_label;
5736 if (! JUMP_TABLE_DATA_P (insn)
5737 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5738 continue;
5739 pat = PATTERN (insn);
5740 vec_lab = XEXP (XEXP (pat, 0), 0);
5742 /* Search the matching casesi_jump_2. */
5743 for (prev = vec_lab; ; prev = PREV_INSN (prev))
5745 if (!JUMP_P (prev))
5746 continue;
5747 prevpat = PATTERN (prev);
5748 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5749 continue;
5750 x = XVECEXP (prevpat, 0, 1);
5751 if (GET_CODE (x) != USE)
5752 continue;
5753 x = XEXP (x, 0);
5754 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5755 break;
5757 /* FIXME: This is a bug in the optimizer, but it seems harmless
5758 to just avoid panicing. */
5759 if (!prev)
5760 continue;
5762 /* Emit the reference label of the braf where it belongs, right after
5763 the casesi_jump_2 (i.e. braf). */
5764 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5765 emit_label_after (braf_label, prev);
5767 /* Fix up the ADDR_DIF_VEC to be relative
5768 to the reference address of the braf. */
5769 XEXP (XEXP (pat, 0), 0) = braf_label;
5773 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5774 a barrier. Return the base 2 logarithm of the desired alignment. */
5776 barrier_align (rtx barrier_or_label)
5778 rtx next, pat;
5780 if (! barrier_or_label)
5781 return 0;
5783 if (LABEL_P (barrier_or_label)
5784 && NEXT_INSN (barrier_or_label)
5785 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5786 return 2;
5788 if (BARRIER_P (barrier_or_label)
5789 && PREV_INSN (barrier_or_label)
5790 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5792 pat = PATTERN (PREV_INSN (barrier_or_label));
5793 /* If this is a very small table, we want to keep the alignment after
5794 the table to the minimum for proper code alignment. */
5795 return ((optimize_size
5796 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5797 <= (unsigned) 1 << (CACHE_LOG - 2)))
5798 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5801 next = next_active_insn (barrier_or_label);
5803 if (! next)
5804 return 0;
5806 pat = PATTERN (next);
5808 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5809 /* This is a barrier in front of a constant table. */
5810 return 0;
5812 if (optimize_size)
5813 return 0;
5815 if (! TARGET_SH2 || ! optimize)
5816 return align_jumps_log;
5818 /* When fixing up pcloads, a constant table might be inserted just before
5819 the basic block that ends with the barrier. Thus, we can't trust the
5820 instruction lengths before that. */
5821 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5823 /* Check if there is an immediately preceding branch to the insn beyond
5824 the barrier. We must weight the cost of discarding useful information
5825 from the current cache line when executing this branch and there is
5826 an alignment, against that of fetching unneeded insn in front of the
5827 branch target when there is no alignment. */
5829 /* There are two delay_slot cases to consider. One is the simple case
5830 where the preceding branch is to the insn beyond the barrier (simple
5831 delay slot filling), and the other is where the preceding branch has
5832 a delay slot that is a duplicate of the insn after the barrier
5833 (fill_eager_delay_slots) and the branch is to the insn after the insn
5834 after the barrier. */
5836 int slot, credit;
5837 bool jump_to_next = false;
5839 /* Skip to the insn before the JUMP_INSN before the barrier under
5840 investigation. */
5841 rtx prev = prev_real_insn (prev_active_insn (barrier_or_label));
5843 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5844 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5845 prev = prev_real_insn (prev))
5847 jump_to_next = false;
5848 if (GET_CODE (PATTERN (prev)) == USE
5849 || GET_CODE (PATTERN (prev)) == CLOBBER)
5850 continue;
5851 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
5853 prev = XVECEXP (PATTERN (prev), 0, 1);
5854 if (INSN_UID (prev) == INSN_UID (next))
5856 /* Delay slot was filled with insn at jump target. */
5857 jump_to_next = true;
5858 continue;
5862 if (slot &&
5863 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5864 slot = 0;
5865 credit -= get_attr_length (prev);
5867 if (prev && jump_to_label_p (prev))
5869 rtx x;
5870 if (jump_to_next
5871 || next_real_insn (JUMP_LABEL (prev)) == next
5872 /* If relax_delay_slots() decides NEXT was redundant
5873 with some previous instruction, it will have
5874 redirected PREV's jump to the following insn. */
5875 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5876 /* There is no upper bound on redundant instructions
5877 that might have been skipped, but we must not put an
5878 alignment where none had been before. */
5879 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5880 (INSN_P (x)
5881 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5882 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5883 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5885 rtx pat = PATTERN (prev);
5886 if (GET_CODE (pat) == PARALLEL)
5887 pat = XVECEXP (pat, 0, 0);
5888 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5889 return 0;
5894 return align_jumps_log;
5897 /* If we are inside a phony loop, almost any kind of label can turn up as the
5898 first one in the loop. Aligning a braf label causes incorrect switch
5899 destination addresses; we can detect braf labels because they are
5900 followed by a BARRIER.
5901 Applying loop alignment to small constant or switch tables is a waste
5902 of space, so we suppress this too. */
5904 sh_loop_align (rtx label)
5906 rtx next = label;
5908 if (! optimize || optimize_size)
5909 return 0;
5912 next = next_nonnote_insn (next);
5913 while (next && LABEL_P (next));
5915 if (! next
5916 || ! INSN_P (next)
5917 || recog_memoized (next) == CODE_FOR_consttable_2)
5918 return 0;
5920 return align_loops_log;
5923 /* Do a final pass over the function, just before delayed branch
5924 scheduling. */
5925 static void
5926 sh_reorg (void)
5928 rtx first, insn, mova = NULL_RTX;
5929 int num_mova;
5930 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5931 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5933 first = get_insns ();
5934 max_labelno_before_reorg = max_label_num ();
5936 /* We must split call insns before introducing `mova's. If we're
5937 optimizing, they'll have already been split. Otherwise, make
5938 sure we don't split them too late. */
5939 if (! optimize)
5940 split_all_insns_noflow ();
5942 if (TARGET_SHMEDIA)
5943 return;
5945 /* If relaxing, generate pseudo-ops to associate function calls with
5946 the symbols they call. It does no harm to not generate these
5947 pseudo-ops. However, when we can generate them, it enables the
5948 linker to potentially relax the jsr to a bsr, and eliminate the
5949 register load and, possibly, the constant pool entry. */
5951 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5952 if (TARGET_RELAX)
5954 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5955 own purposes. This works because none of the remaining passes
5956 need to look at them.
5958 ??? But it may break in the future. We should use a machine
5959 dependent REG_NOTE, or some other approach entirely. */
5960 for (insn = first; insn; insn = NEXT_INSN (insn))
5962 if (INSN_P (insn))
5964 rtx note;
5966 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5967 NULL_RTX)) != 0)
5968 remove_note (insn, note);
5972 for (insn = first; insn; insn = NEXT_INSN (insn))
5974 rtx pattern, reg, link, set, scan, dies, label;
5975 int rescan = 0, foundinsn = 0;
5977 if (CALL_P (insn))
5979 pattern = PATTERN (insn);
5981 if (GET_CODE (pattern) == PARALLEL)
5982 pattern = XVECEXP (pattern, 0, 0);
5983 if (GET_CODE (pattern) == SET)
5984 pattern = SET_SRC (pattern);
5986 if (GET_CODE (pattern) != CALL
5987 || !MEM_P (XEXP (pattern, 0)))
5988 continue;
5990 reg = XEXP (XEXP (pattern, 0), 0);
5992 else
5994 reg = sfunc_uses_reg (insn);
5995 if (! reg)
5996 continue;
5999 if (!REG_P (reg))
6000 continue;
6002 /* Try scanning backward to find where the register is set. */
6003 link = NULL;
6004 for (scan = PREV_INSN (insn);
6005 scan && !LABEL_P (scan);
6006 scan = PREV_INSN (scan))
6008 if (! INSN_P (scan))
6009 continue;
6011 if (! reg_mentioned_p (reg, scan))
6012 continue;
6014 if (noncall_uses_reg (reg, scan, &set))
6015 break;
6017 if (set)
6019 link = scan;
6020 break;
6024 if (! link)
6025 continue;
6027 /* The register is set at LINK. */
6029 /* We can only optimize the function call if the register is
6030 being set to a symbol. In theory, we could sometimes
6031 optimize calls to a constant location, but the assembler
6032 and linker do not support that at present. */
6033 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6034 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6035 continue;
6037 /* Scan forward from LINK to the place where REG dies, and
6038 make sure that the only insns which use REG are
6039 themselves function calls. */
6041 /* ??? This doesn't work for call targets that were allocated
6042 by reload, since there may not be a REG_DEAD note for the
6043 register. */
6045 dies = NULL_RTX;
6046 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6048 rtx scanset;
6050 /* Don't try to trace forward past a CODE_LABEL if we haven't
6051 seen INSN yet. Ordinarily, we will only find the setting insn
6052 if it is in the same basic block. However,
6053 cross-jumping can insert code labels in between the load and
6054 the call, and can result in situations where a single call
6055 insn may have two targets depending on where we came from. */
6057 if (LABEL_P (scan) && ! foundinsn)
6058 break;
6060 if (! INSN_P (scan))
6061 continue;
6063 /* Don't try to trace forward past a JUMP. To optimize
6064 safely, we would have to check that all the
6065 instructions at the jump destination did not use REG. */
6067 if (JUMP_P (scan))
6068 break;
6070 if (! reg_mentioned_p (reg, scan))
6071 continue;
6073 if (noncall_uses_reg (reg, scan, &scanset))
6074 break;
6076 if (scan == insn)
6077 foundinsn = 1;
6079 if (scan != insn
6080 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6082 /* There is a function call to this register other
6083 than the one we are checking. If we optimize
6084 this call, we need to rescan again below. */
6085 rescan = 1;
6088 /* ??? We shouldn't have to worry about SCANSET here.
6089 We should just be able to check for a REG_DEAD note
6090 on a function call. However, the REG_DEAD notes are
6091 apparently not dependable around libcalls; c-torture
6092 execute/920501-2 is a test case. If SCANSET is set,
6093 then this insn sets the register, so it must have
6094 died earlier. Unfortunately, this will only handle
6095 the cases in which the register is, in fact, set in a
6096 later insn. */
6098 /* ??? We shouldn't have to use FOUNDINSN here.
6099 This dates back to when we used LOG_LINKS to find
6100 the most recent insn which sets the register. */
6102 if (foundinsn
6103 && (scanset
6104 || find_reg_note (scan, REG_DEAD, reg)))
6106 dies = scan;
6107 break;
6111 if (! dies)
6113 /* Either there was a branch, or some insn used REG
6114 other than as a function call address. */
6115 continue;
6118 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6119 on the insn which sets the register, and on each call insn
6120 which uses the register. In final_prescan_insn we look for
6121 the REG_LABEL_OPERAND notes, and output the appropriate label
6122 or pseudo-op. */
6124 label = gen_label_rtx ();
6125 add_reg_note (link, REG_LABEL_OPERAND, label);
6126 add_reg_note (insn, REG_LABEL_OPERAND, label);
6127 if (rescan)
6129 scan = link;
6132 rtx reg2;
6134 scan = NEXT_INSN (scan);
6135 if (scan != insn
6136 && ((CALL_P (scan)
6137 && reg_mentioned_p (reg, scan))
6138 || ((reg2 = sfunc_uses_reg (scan))
6139 && REGNO (reg2) == REGNO (reg))))
6140 add_reg_note (scan, REG_LABEL_OPERAND, label);
6142 while (scan != dies);
6147 if (TARGET_SH2)
6148 fixup_addr_diff_vecs (first);
6150 if (optimize)
6152 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6153 shorten_branches (first);
6156 /* Scan the function looking for move instructions which have to be
6157 changed to pc-relative loads and insert the literal tables. */
6158 label_ref_list_pool = create_alloc_pool ("label references list",
6159 sizeof (struct label_ref_list_d),
6160 30);
6161 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6162 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6164 if (mova_p (insn))
6166 /* ??? basic block reordering can move a switch table dispatch
6167 below the switch table. Check if that has happened.
6168 We only have the addresses available when optimizing; but then,
6169 this check shouldn't be needed when not optimizing. */
6170 if (!untangle_mova (&num_mova, &mova, insn))
6172 insn = mova;
6173 num_mova = 0;
6176 else if (JUMP_TABLE_DATA_P (insn)
6177 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6178 && num_mova
6179 /* ??? loop invariant motion can also move a mova out of a
6180 loop. Since loop does this code motion anyway, maybe we
6181 should wrap UNSPEC_MOVA into a CONST, so that reload can
6182 move it back. */
6183 && ((num_mova > 1
6184 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6185 || (prev_nonnote_insn (insn)
6186 == XEXP (MOVA_LABELREF (mova), 0))))
6188 rtx scan;
6189 int total;
6191 num_mova--;
6193 /* Some code might have been inserted between the mova and
6194 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6195 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6196 total += get_attr_length (scan);
6198 /* range of mova is 1020, add 4 because pc counts from address of
6199 second instruction after this one, subtract 2 in case pc is 2
6200 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6201 cancels out with alignment effects of the mova itself. */
6202 if (total > 1022)
6204 /* Change the mova into a load, and restart scanning
6205 there. broken_move will then return true for mova. */
6206 fixup_mova (mova);
6207 insn = mova;
6210 if (broken_move (insn)
6211 || (NONJUMP_INSN_P (insn)
6212 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6214 rtx scan;
6215 /* Scan ahead looking for a barrier to stick the constant table
6216 behind. */
6217 rtx barrier = find_barrier (num_mova, mova, insn);
6218 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
6219 int need_aligned_label = 0;
6221 if (num_mova && ! mova_p (mova))
6223 /* find_barrier had to change the first mova into a
6224 pcload; thus, we have to start with this new pcload. */
6225 insn = mova;
6226 num_mova = 0;
6228 /* Now find all the moves between the points and modify them. */
6229 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6231 if (LABEL_P (scan))
6232 last_float = 0;
6233 if (NONJUMP_INSN_P (scan)
6234 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6235 need_aligned_label = 1;
6236 if (broken_move (scan))
6238 rtx *patp = &PATTERN (scan), pat = *patp;
6239 rtx src, dst;
6240 rtx lab;
6241 rtx newsrc;
6242 enum machine_mode mode;
6244 if (GET_CODE (pat) == PARALLEL)
6245 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6246 src = SET_SRC (pat);
6247 dst = SET_DEST (pat);
6248 mode = GET_MODE (dst);
6250 if (mode == SImode && satisfies_constraint_I16 (src)
6251 && REGNO (dst) != FPUL_REG)
6253 int offset = 0;
6255 mode = HImode;
6256 while (GET_CODE (dst) == SUBREG)
6258 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6259 GET_MODE (SUBREG_REG (dst)),
6260 SUBREG_BYTE (dst),
6261 GET_MODE (dst));
6262 dst = SUBREG_REG (dst);
6264 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6266 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6268 /* This must be an insn that clobbers r0. */
6269 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6270 XVECLEN (PATTERN (scan), 0)
6271 - 1);
6272 rtx clobber = *clobberp;
6274 gcc_assert (GET_CODE (clobber) == CLOBBER
6275 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6277 if (last_float
6278 && reg_set_between_p (r0_rtx, last_float_move, scan))
6279 last_float = 0;
6280 if (last_float
6281 && TARGET_SHCOMPACT
6282 && GET_MODE_SIZE (mode) != 4
6283 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6284 last_float = 0;
6285 lab = add_constant (src, mode, last_float);
6286 if (lab)
6287 emit_insn_before (gen_mova (lab), scan);
6288 else
6290 /* There will be a REG_UNUSED note for r0 on
6291 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6292 lest reorg:mark_target_live_regs will not
6293 consider r0 to be used, and we end up with delay
6294 slot insn in front of SCAN that clobbers r0. */
6295 rtx note
6296 = find_regno_note (last_float_move, REG_UNUSED, 0);
6298 /* If we are not optimizing, then there may not be
6299 a note. */
6300 if (note)
6301 PUT_REG_NOTE_KIND (note, REG_INC);
6303 *last_float_addr = r0_inc_rtx;
6305 last_float_move = scan;
6306 last_float = src;
6307 newsrc = gen_const_mem (mode,
6308 (((TARGET_SH4 && ! TARGET_FMOVD)
6309 || REGNO (dst) == FPUL_REG)
6310 ? r0_inc_rtx
6311 : r0_rtx));
6312 last_float_addr = &XEXP (newsrc, 0);
6314 /* Remove the clobber of r0. */
6315 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6316 gen_rtx_SCRATCH (Pmode));
6318 /* This is a mova needing a label. Create it. */
6319 else if (GET_CODE (src) == UNSPEC
6320 && XINT (src, 1) == UNSPEC_MOVA
6321 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6323 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6324 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6325 newsrc = gen_rtx_UNSPEC (SImode,
6326 gen_rtvec (1, newsrc),
6327 UNSPEC_MOVA);
6329 else if (GET_CODE (src) == UNSPEC_VOLATILE
6330 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6332 newsrc = XVECEXP (src, 0, 0);
6333 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6334 INSN_CODE (scan) = -1;
6335 continue;
6337 else
6339 lab = add_constant (src, mode, 0);
6340 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6341 newsrc = gen_const_mem (mode, newsrc);
6343 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
6344 INSN_CODE (scan) = -1;
6347 dump_table (need_aligned_label ? insn : 0, barrier);
6348 insn = barrier;
6351 free_alloc_pool (label_ref_list_pool);
6352 for (insn = first; insn; insn = NEXT_INSN (insn))
6353 PUT_MODE (insn, VOIDmode);
6355 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6356 INSN_ADDRESSES_FREE ();
6357 split_branches (first);
6359 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6360 also has an effect on the register that holds the address of the sfunc.
6361 Insert an extra dummy insn in front of each sfunc that pretends to
6362 use this register. */
6363 if (flag_delayed_branch)
6365 for (insn = first; insn; insn = NEXT_INSN (insn))
6367 rtx reg = sfunc_uses_reg (insn);
6369 if (! reg)
6370 continue;
6371 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6374 #if 0
6375 /* fpscr is not actually a user variable, but we pretend it is for the
6376 sake of the previous optimization passes, since we want it handled like
6377 one. However, we don't have any debugging information for it, so turn
6378 it into a non-user variable now. */
6379 if (TARGET_SH4)
6380 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
6381 #endif
6382 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6385 /* Return the UID of the insn that follows the specified label. */
6387 get_dest_uid (rtx label, int max_uid)
6389 rtx dest = next_real_insn (label);
6390 int dest_uid;
6391 if (! dest)
6392 /* This can happen for an undefined label. */
6393 return 0;
6394 dest_uid = INSN_UID (dest);
6395 /* If this is a newly created branch redirection blocking instruction,
6396 we cannot index the branch_uid or insn_addresses arrays with its
6397 uid. But then, we won't need to, because the actual destination is
6398 the following branch. */
6399 while (dest_uid >= max_uid)
6401 dest = NEXT_INSN (dest);
6402 dest_uid = INSN_UID (dest);
6404 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6405 return 0;
6406 return dest_uid;
6409 /* Split condbranches that are out of range. Also add clobbers for
6410 scratch registers that are needed in far jumps.
6411 We do this before delay slot scheduling, so that it can take our
6412 newly created instructions into account. It also allows us to
6413 find branches with common targets more easily. */
6414 static void
6415 split_branches (rtx first)
6417 rtx insn;
6418 struct far_branch **uid_branch, *far_branch_list = 0;
6419 int max_uid = get_max_uid ();
6420 int ok;
6422 /* Find out which branches are out of range. */
6423 shorten_branches (first);
6425 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6426 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6428 for (insn = first; insn; insn = NEXT_INSN (insn))
6429 if (! INSN_P (insn))
6430 continue;
6431 else if (INSN_DELETED_P (insn))
6433 /* Shorten_branches would split this instruction again,
6434 so transform it into a note. */
6435 SET_INSN_DELETED (insn);
6437 else if (JUMP_P (insn))
6439 enum attr_type type = get_attr_type (insn);
6440 if (type == TYPE_CBRANCH)
6442 rtx next, beyond;
6444 if (get_attr_length (insn) > 4)
6446 rtx src = SET_SRC (PATTERN (insn));
6447 rtx olabel = XEXP (XEXP (src, 1), 0);
6448 int addr = INSN_ADDRESSES (INSN_UID (insn));
6449 rtx label = 0;
6450 int dest_uid = get_dest_uid (olabel, max_uid);
6451 struct far_branch *bp = uid_branch[dest_uid];
6453 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6454 the label if the LABEL_NUSES count drops to zero. There is
6455 always a jump_optimize pass that sets these values, but it
6456 proceeds to delete unreferenced code, and then if not
6457 optimizing, to un-delete the deleted instructions, thus
6458 leaving labels with too low uses counts. */
6459 if (! optimize)
6461 JUMP_LABEL (insn) = olabel;
6462 LABEL_NUSES (olabel)++;
6464 if (! bp)
6466 bp = (struct far_branch *) alloca (sizeof *bp);
6467 uid_branch[dest_uid] = bp;
6468 bp->prev = far_branch_list;
6469 far_branch_list = bp;
6470 bp->far_label
6471 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6472 LABEL_NUSES (bp->far_label)++;
6474 else
6476 label = bp->near_label;
6477 if (! label && bp->address - addr >= CONDJUMP_MIN)
6479 rtx block = bp->insert_place;
6481 if (GET_CODE (PATTERN (block)) == RETURN)
6482 block = PREV_INSN (block);
6483 else
6484 block = gen_block_redirect (block,
6485 bp->address, 2);
6486 label = emit_label_after (gen_label_rtx (),
6487 PREV_INSN (block));
6488 bp->near_label = label;
6490 else if (label && ! NEXT_INSN (label))
6492 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6493 bp->insert_place = insn;
6494 else
6495 gen_far_branch (bp);
6498 if (! label
6499 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6501 bp->near_label = label = gen_label_rtx ();
6502 bp->insert_place = insn;
6503 bp->address = addr;
6505 ok = redirect_jump (insn, label, 0);
6506 gcc_assert (ok);
6508 else
6510 /* get_attr_length (insn) == 2 */
6511 /* Check if we have a pattern where reorg wants to redirect
6512 the branch to a label from an unconditional branch that
6513 is too far away. */
6514 /* We can't use JUMP_LABEL here because it might be undefined
6515 when not optimizing. */
6516 /* A syntax error might cause beyond to be NULL_RTX. */
6517 beyond
6518 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6519 0));
6521 if (beyond
6522 && (JUMP_P (beyond)
6523 || ((beyond = next_active_insn (beyond))
6524 && JUMP_P (beyond)))
6525 && GET_CODE (PATTERN (beyond)) == SET
6526 && recog_memoized (beyond) == CODE_FOR_jump_compact
6527 && ((INSN_ADDRESSES
6528 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6529 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6530 > 252 + 258 + 2))
6531 gen_block_redirect (beyond,
6532 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6535 next = next_active_insn (insn);
6537 if (next
6538 && (JUMP_P (next)
6539 || ((next = next_active_insn (next))
6540 && JUMP_P (next)))
6541 && GET_CODE (PATTERN (next)) == SET
6542 && recog_memoized (next) == CODE_FOR_jump_compact
6543 && ((INSN_ADDRESSES
6544 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6545 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6546 > 252 + 258 + 2))
6547 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6549 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6551 int addr = INSN_ADDRESSES (INSN_UID (insn));
6552 rtx far_label = 0;
6553 int dest_uid = 0;
6554 struct far_branch *bp;
6556 if (type == TYPE_JUMP)
6558 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
6559 dest_uid = get_dest_uid (far_label, max_uid);
6560 if (! dest_uid)
6562 /* Parse errors can lead to labels outside
6563 the insn stream. */
6564 if (! NEXT_INSN (far_label))
6565 continue;
6567 if (! optimize)
6569 JUMP_LABEL (insn) = far_label;
6570 LABEL_NUSES (far_label)++;
6572 redirect_jump (insn, ret_rtx, 1);
6573 far_label = 0;
6576 bp = uid_branch[dest_uid];
6577 if (! bp)
6579 bp = (struct far_branch *) alloca (sizeof *bp);
6580 uid_branch[dest_uid] = bp;
6581 bp->prev = far_branch_list;
6582 far_branch_list = bp;
6583 bp->near_label = 0;
6584 bp->far_label = far_label;
6585 if (far_label)
6586 LABEL_NUSES (far_label)++;
6588 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6589 if (addr - bp->address <= CONDJUMP_MAX)
6590 emit_label_after (bp->near_label, PREV_INSN (insn));
6591 else
6593 gen_far_branch (bp);
6594 bp->near_label = 0;
6596 else
6597 bp->near_label = 0;
6598 bp->address = addr;
6599 bp->insert_place = insn;
6600 if (! far_label)
6601 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6602 else
6603 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6606 /* Generate all pending far branches,
6607 and free our references to the far labels. */
6608 while (far_branch_list)
6610 if (far_branch_list->near_label
6611 && ! NEXT_INSN (far_branch_list->near_label))
6612 gen_far_branch (far_branch_list);
6613 if (optimize
6614 && far_branch_list->far_label
6615 && ! --LABEL_NUSES (far_branch_list->far_label))
6616 delete_insn (far_branch_list->far_label);
6617 far_branch_list = far_branch_list->prev;
6620 /* Instruction length information is no longer valid due to the new
6621 instructions that have been generated. */
6622 init_insn_lengths ();
6625 /* Dump out instruction addresses, which is useful for debugging the
6626 constant pool table stuff.
6628 If relaxing, output the label and pseudo-ops used to link together
6629 calls and the instruction which set the registers.
6631 ??? The addresses printed by this routine for insns are nonsense for
6632 insns which are inside of a sequence where none of the inner insns have
6633 variable length. This is because the second pass of shorten_branches
6634 does not bother to update them. */
6635 void
6636 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
6637 int noperands ATTRIBUTE_UNUSED)
6639 if (TARGET_DUMPISIZE)
6640 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6642 if (TARGET_RELAX)
6644 rtx note;
6646 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6647 if (note)
6649 rtx pattern;
6651 pattern = PATTERN (insn);
6652 if (GET_CODE (pattern) == PARALLEL)
6653 pattern = XVECEXP (pattern, 0, 0);
6654 switch (GET_CODE (pattern))
6656 case SET:
6657 if (GET_CODE (SET_SRC (pattern)) != CALL
6658 && get_attr_type (insn) != TYPE_SFUNC)
6660 targetm.asm_out.internal_label
6661 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6662 break;
6664 /* else FALLTHROUGH */
6665 case CALL:
6666 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6667 CODE_LABEL_NUMBER (XEXP (note, 0)));
6668 break;
6670 default:
6671 gcc_unreachable ();
6677 /* Dump out any constants accumulated in the final pass. These will
6678 only be labels. */
6679 const char *
6680 output_jump_label_table (void)
6682 int i;
6684 if (pool_size)
6686 fprintf (asm_out_file, "\t.align 2\n");
6687 for (i = 0; i < pool_size; i++)
6689 pool_node *p = &pool_vector[i];
6691 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6692 CODE_LABEL_NUMBER (p->label));
6693 output_asm_insn (".long %O0", &p->value);
6695 pool_size = 0;
6698 return "";
6701 /* A full frame looks like:
6703 arg-5
6704 arg-4
6705 [ if current_function_anonymous_args
6706 arg-3
6707 arg-2
6708 arg-1
6709 arg-0 ]
6710 saved-fp
6711 saved-r10
6712 saved-r11
6713 saved-r12
6714 saved-pr
6715 local-n
6717 local-1
6718 local-0 <- fp points here.
6720 Number of bytes pushed for anonymous args, used to pass information
6721 between expand_prologue and expand_epilogue.
6723 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6724 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6725 for an epilogue and a negative value means that it's for a sibcall
6726 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6727 all the registers that are about to be restored, and hence dead. */
6728 static void
6729 output_stack_adjust (int size, rtx reg, int epilogue_p,
6730 HARD_REG_SET *live_regs_mask, bool frame_p)
6732 rtx (*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6733 if (size)
6735 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6737 /* This test is bogus, as output_stack_adjust is used to re-align the
6738 stack. */
6739 #if 0
6740 gcc_assert (!(size % align));
6741 #endif
6743 if (CONST_OK_FOR_ADD (size))
6744 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6745 /* Try to do it with two partial adjustments; however, we must make
6746 sure that the stack is properly aligned at all times, in case
6747 an interrupt occurs between the two partial adjustments. */
6748 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6749 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6751 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6752 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6754 else
6756 rtx const_reg;
6757 rtx insn;
6758 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6759 int i;
6761 /* If TEMP is invalid, we could temporarily save a general
6762 register to MACL. However, there is currently no need
6763 to handle this case, so just die when we see it. */
6764 if (epilogue_p < 0
6765 || current_function_interrupt
6766 || ! call_really_used_regs[temp] || fixed_regs[temp])
6767 temp = -1;
6768 if (temp < 0 && ! current_function_interrupt
6769 && (TARGET_SHMEDIA || epilogue_p >= 0))
6771 HARD_REG_SET temps;
6772 COPY_HARD_REG_SET (temps, call_used_reg_set);
6773 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6774 if (epilogue_p > 0)
6776 int nreg = 0;
6777 if (crtl->return_rtx)
6779 enum machine_mode mode;
6780 mode = GET_MODE (crtl->return_rtx);
6781 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6782 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6784 for (i = 0; i < nreg; i++)
6785 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6786 if (crtl->calls_eh_return)
6788 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6789 for (i = 0; i <= 3; i++)
6790 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6793 if (TARGET_SHMEDIA && epilogue_p < 0)
6794 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6795 CLEAR_HARD_REG_BIT (temps, i);
6796 if (epilogue_p <= 0)
6798 for (i = FIRST_PARM_REG;
6799 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6800 CLEAR_HARD_REG_BIT (temps, i);
6801 if (cfun->static_chain_decl != NULL)
6802 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6804 temp = scavenge_reg (&temps);
6806 if (temp < 0 && live_regs_mask)
6808 HARD_REG_SET temps;
6810 COPY_HARD_REG_SET (temps, *live_regs_mask);
6811 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6812 temp = scavenge_reg (&temps);
6814 if (temp < 0)
6816 rtx adj_reg, tmp_reg, mem;
6818 /* If we reached here, the most likely case is the (sibcall)
6819 epilogue for non SHmedia. Put a special push/pop sequence
6820 for such case as the last resort. This looks lengthy but
6821 would not be problem because it seems to be very
6822 rare. */
6824 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6827 /* ??? There is still the slight possibility that r4 or
6828 r5 have been reserved as fixed registers or assigned
6829 as global registers, and they change during an
6830 interrupt. There are possible ways to handle this:
6832 - If we are adjusting the frame pointer (r14), we can do
6833 with a single temp register and an ordinary push / pop
6834 on the stack.
6835 - Grab any call-used or call-saved registers (i.e. not
6836 fixed or globals) for the temps we need. We might
6837 also grab r14 if we are adjusting the stack pointer.
6838 If we can't find enough available registers, issue
6839 a diagnostic and die - the user must have reserved
6840 way too many registers.
6841 But since all this is rather unlikely to happen and
6842 would require extra testing, we just die if r4 / r5
6843 are not available. */
6844 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6845 && !global_regs[4] && !global_regs[5]);
6847 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6848 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6849 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6850 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6851 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6852 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6853 emit_move_insn (mem, tmp_reg);
6854 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6855 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6856 emit_move_insn (mem, tmp_reg);
6857 emit_move_insn (reg, adj_reg);
6858 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6859 emit_move_insn (adj_reg, mem);
6860 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6861 emit_move_insn (tmp_reg, mem);
6862 /* Tell flow the insns that pop r4/r5 aren't dead. */
6863 emit_use (tmp_reg);
6864 emit_use (adj_reg);
6865 return;
6867 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6869 /* If SIZE is negative, subtract the positive value.
6870 This sometimes allows a constant pool entry to be shared
6871 between prologue and epilogue code. */
6872 if (size < 0)
6874 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6875 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6877 else
6879 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6880 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6882 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6883 gen_rtx_SET (VOIDmode, reg,
6884 gen_rtx_PLUS (SImode, reg,
6885 GEN_INT (size))));
6890 /* Emit the specified insn and mark it as frame related.
6891 FIXME: Rename this to emit_frame_insn. */
6892 static rtx
6893 frame_insn (rtx x)
6895 x = emit_insn (x);
6896 RTX_FRAME_RELATED_P (x) = 1;
6897 return x;
6900 /* Output RTL to push register RN onto the stack. */
6901 static rtx
6902 push (int rn)
6904 rtx x;
6905 if (rn == FPUL_REG)
6906 x = gen_push_fpul ();
6907 else if (rn == FPSCR_REG)
6908 x = gen_push_fpscr ();
6909 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6910 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6912 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6913 return NULL_RTX;
6914 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6916 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6917 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6918 else
6919 x = gen_push (gen_rtx_REG (SImode, rn));
6921 x = frame_insn (x);
6922 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6923 return x;
6926 /* Output RTL to pop register RN from the stack. */
6927 static void
6928 pop (int rn)
6930 rtx x, sp_reg, reg;
6931 if (rn == FPUL_REG)
6932 x = gen_pop_fpul ();
6933 else if (rn == FPSCR_REG)
6934 x = gen_pop_fpscr ();
6935 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6936 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6938 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6939 return;
6940 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6942 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6943 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6944 else
6945 x = gen_pop (gen_rtx_REG (SImode, rn));
6947 x = emit_insn (x);
6949 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6950 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6951 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6952 : SET_DEST (PATTERN (x)));
6953 add_reg_note (x, REG_CFA_RESTORE, reg);
6954 add_reg_note (x, REG_CFA_ADJUST_CFA,
6955 gen_rtx_SET (SImode, sp_reg,
6956 plus_constant (SImode, sp_reg,
6957 GET_MODE_SIZE (GET_MODE (reg)))));
6958 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6959 RTX_FRAME_RELATED_P (x) = 1;
6962 /* Generate code to push the regs specified in the mask. */
6963 static void
6964 push_regs (HARD_REG_SET *mask, int interrupt_handler)
6966 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6967 int skip_fpscr = 0;
6969 /* Push PR last; this gives better latencies after the prologue, and
6970 candidates for the return delay slot when there are no general
6971 registers pushed. */
6972 for (; i < FIRST_PSEUDO_REGISTER; i++)
6974 /* If this is an interrupt handler, and the SZ bit varies,
6975 and we have to push any floating point register, we need
6976 to switch to the correct precision first. */
6977 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6978 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6980 HARD_REG_SET unsaved;
6982 push (FPSCR_REG);
6983 COMPL_HARD_REG_SET (unsaved, *mask);
6984 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6985 skip_fpscr = 1;
6987 if (i != PR_REG
6988 && (i != FPSCR_REG || ! skip_fpscr)
6989 && TEST_HARD_REG_BIT (*mask, i))
6991 /* If the ISR has RESBANK attribute assigned, don't push any of
6992 the following registers - R0-R14, MACH, MACL and GBR. */
6993 if (! (sh_cfun_resbank_handler_p ()
6994 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6995 || i == MACH_REG
6996 || i == MACL_REG
6997 || i == GBR_REG)))
6998 push (i);
7002 /* Push banked registers last to improve delay slot opportunities. */
7003 if (interrupt_handler)
7005 bool use_movml = false;
7007 if (TARGET_SH2A)
7009 unsigned int count = 0;
7011 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7012 if (TEST_HARD_REG_BIT (*mask, i))
7013 count++;
7014 else
7015 break;
7017 /* Use movml when all banked registers are pushed. */
7018 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7019 use_movml = true;
7022 if (sh_cfun_resbank_handler_p ())
7023 ; /* Do nothing. */
7024 else if (use_movml)
7026 rtx x, mem, reg, set;
7027 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7029 /* We must avoid scheduling multiple store insn with another
7030 insns. */
7031 emit_insn (gen_blockage ());
7032 x = gen_movml_push_banked (sp_reg);
7033 x = frame_insn (x);
7034 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7036 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7037 reg = gen_rtx_REG (SImode, i);
7038 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
7041 set = gen_rtx_SET (SImode, sp_reg,
7042 plus_constant (Pmode, sp_reg, - 32));
7043 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7044 emit_insn (gen_blockage ());
7046 else
7047 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7048 if (TEST_HARD_REG_BIT (*mask, i))
7049 push (i);
7052 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7053 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7054 push (PR_REG);
7057 /* Calculate how much extra space is needed to save all callee-saved
7058 target registers.
7059 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7060 static int
7061 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7063 int reg;
7064 int stack_space = 0;
7065 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7067 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7068 if ((! call_really_used_regs[reg] || interrupt_handler)
7069 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7070 /* Leave space to save this target register on the stack,
7071 in case target register allocation wants to use it. */
7072 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7073 return stack_space;
7076 /* Decide whether we should reserve space for callee-save target registers,
7077 in case target register allocation wants to use them. REGS_SAVED is
7078 the space, in bytes, that is already required for register saves.
7079 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7080 static int
7081 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7082 HARD_REG_SET *live_regs_mask)
7084 if (optimize_size)
7085 return 0;
7086 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7089 /* Decide how much space to reserve for callee-save target registers
7090 in case target register allocation wants to use them.
7091 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7092 static int
7093 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7095 if (shmedia_space_reserved_for_target_registers)
7096 return shmedia_target_regs_stack_space (live_regs_mask);
7097 else
7098 return 0;
7101 /* Work out the registers which need to be saved, both as a mask and a
7102 count of saved words. Return the count.
7104 If doing a pragma interrupt function, then push all regs used by the
7105 function, and if we call another function (we can tell by looking at PR),
7106 make sure that all the regs it clobbers are safe too. */
7107 static int
7108 calc_live_regs (HARD_REG_SET *live_regs_mask)
7110 unsigned int reg;
7111 int count;
7112 tree attrs;
7113 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7114 bool nosave_low_regs;
7115 int pr_live, has_call;
7117 attrs = DECL_ATTRIBUTES (current_function_decl);
7118 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7119 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7120 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7121 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7123 CLEAR_HARD_REG_SET (*live_regs_mask);
7124 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7125 && df_regs_ever_live_p (FPSCR_REG))
7126 target_flags &= ~MASK_FPU_SINGLE;
7127 /* If we can save a lot of saves by switching to double mode, do that. */
7128 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7129 && TARGET_FPU_SINGLE)
7130 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7131 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7132 && (! call_really_used_regs[reg]
7133 || interrupt_handler)
7134 && ++count > 2)
7136 target_flags &= ~MASK_FPU_SINGLE;
7137 break;
7139 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7140 knows how to use it. That means the pseudo originally allocated for
7141 the initial value can become the PR_MEDIA_REG hard register, as seen for
7142 execute/20010122-1.c:test9. */
7143 if (TARGET_SHMEDIA)
7144 /* ??? this function is called from initial_elimination_offset, hence we
7145 can't use the result of sh_media_register_for_return here. */
7146 pr_live = sh_pr_n_sets ();
7147 else
7149 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7150 pr_live = (pr_initial
7151 ? (!REG_P (pr_initial)
7152 || REGNO (pr_initial) != (PR_REG))
7153 : df_regs_ever_live_p (PR_REG));
7154 /* For Shcompact, if not optimizing, we end up with a memory reference
7155 using the return address pointer for __builtin_return_address even
7156 though there is no actual need to put the PR register on the stack. */
7157 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7159 /* Force PR to be live if the prologue has to call the SHmedia
7160 argument decoder or register saver. */
7161 if (TARGET_SHCOMPACT
7162 && ((crtl->args.info.call_cookie
7163 & ~ CALL_COOKIE_RET_TRAMP (1))
7164 || crtl->saves_all_registers))
7165 pr_live = 1;
7166 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7167 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7169 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7170 ? pr_live
7171 : interrupt_handler
7172 ? (/* Need to save all the regs ever live. */
7173 (df_regs_ever_live_p (reg)
7174 || (call_really_used_regs[reg]
7175 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7176 || reg == PIC_OFFSET_TABLE_REGNUM)
7177 && has_call)
7178 || (TARGET_SHMEDIA && has_call
7179 && REGISTER_NATURAL_MODE (reg) == SImode
7180 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7181 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7182 && reg != RETURN_ADDRESS_POINTER_REGNUM
7183 && reg != T_REG && reg != GBR_REG
7184 /* Push fpscr only on targets which have FPU */
7185 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7186 : (/* Only push those regs which are used and need to be saved. */
7187 (TARGET_SHCOMPACT
7188 && flag_pic
7189 && crtl->args.info.call_cookie
7190 && reg == PIC_OFFSET_TABLE_REGNUM)
7191 || (df_regs_ever_live_p (reg)
7192 && ((!call_really_used_regs[reg]
7193 && !(reg != PIC_OFFSET_TABLE_REGNUM
7194 && fixed_regs[reg] && call_used_regs[reg]))
7195 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7196 || (crtl->calls_eh_return
7197 && (reg == EH_RETURN_DATA_REGNO (0)
7198 || reg == EH_RETURN_DATA_REGNO (1)
7199 || reg == EH_RETURN_DATA_REGNO (2)
7200 || reg == EH_RETURN_DATA_REGNO (3)))
7201 || ((reg == MACL_REG || reg == MACH_REG)
7202 && df_regs_ever_live_p (reg)
7203 && sh_cfun_attr_renesas_p ())
7206 SET_HARD_REG_BIT (*live_regs_mask, reg);
7207 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7209 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7210 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7212 if (FP_REGISTER_P (reg))
7214 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7216 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7217 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7220 else if (XD_REGISTER_P (reg))
7222 /* Must switch to double mode to access these registers. */
7223 target_flags &= ~MASK_FPU_SINGLE;
7227 if (nosave_low_regs && reg == R8_REG)
7228 break;
7230 /* If we have a target register optimization pass after prologue / epilogue
7231 threading, we need to assume all target registers will be live even if
7232 they aren't now. */
7233 if (flag_branch_target_load_optimize2
7234 && TARGET_SAVE_ALL_TARGET_REGS
7235 && shmedia_space_reserved_for_target_registers)
7236 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7237 if ((! call_really_used_regs[reg] || interrupt_handler)
7238 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7240 SET_HARD_REG_BIT (*live_regs_mask, reg);
7241 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7243 /* If this is an interrupt handler, we don't have any call-clobbered
7244 registers we can conveniently use for target register save/restore.
7245 Make sure we save at least one general purpose register when we need
7246 to save target registers. */
7247 if (interrupt_handler
7248 && hard_reg_set_intersect_p (*live_regs_mask,
7249 reg_class_contents[TARGET_REGS])
7250 && ! hard_reg_set_intersect_p (*live_regs_mask,
7251 reg_class_contents[GENERAL_REGS]))
7253 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7254 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7257 return count;
7260 /* Code to generate prologue and epilogue sequences */
7262 /* PUSHED is the number of bytes that are being pushed on the
7263 stack for register saves. Return the frame size, padded
7264 appropriately so that the stack stays properly aligned. */
7265 static HOST_WIDE_INT
7266 rounded_frame_size (int pushed)
7268 HOST_WIDE_INT size = get_frame_size ();
7269 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7271 if (ACCUMULATE_OUTGOING_ARGS)
7272 size += crtl->outgoing_args_size;
7274 return ((size + pushed + align - 1) & -align) - pushed;
7277 /* Choose a call-clobbered target-branch register that remains
7278 unchanged along the whole function. We set it up as the return
7279 value in the prologue. */
7281 sh_media_register_for_return (void)
7283 int regno;
7284 int tr0_used;
7286 if (! crtl->is_leaf)
7287 return -1;
7288 if (lookup_attribute ("interrupt_handler",
7289 DECL_ATTRIBUTES (current_function_decl)))
7290 return -1;
7291 if (sh_cfun_interrupt_handler_p ())
7292 return -1;
7294 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7296 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7297 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7298 return regno;
7300 return -1;
7303 /* The maximum registers we need to save are:
7304 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7305 - 32 floating point registers (for each pair, we save none,
7306 one single precision value, or a double precision value).
7307 - 8 target registers
7308 - add 1 entry for a delimiter. */
7309 #define MAX_SAVED_REGS (62+32+8)
7311 typedef struct save_entry_s
7313 unsigned char reg;
7314 unsigned char mode;
7315 short offset;
7316 } save_entry;
7318 #define MAX_TEMPS 4
7320 /* There will be a delimiter entry with VOIDmode both at the start and the
7321 end of a filled in schedule. The end delimiter has the offset of the
7322 save with the smallest (i.e. most negative) offset. */
7323 typedef struct save_schedule_s
7325 save_entry entries[MAX_SAVED_REGS + 2];
7326 int temps[MAX_TEMPS+1];
7327 } save_schedule;
7329 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7330 use reverse order. Returns the last entry written to (not counting
7331 the delimiter). OFFSET_BASE is a number to be added to all offset
7332 entries. */
7333 static save_entry *
7334 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7335 int offset_base)
7337 int align, i;
7338 save_entry *entry = schedule->entries;
7339 int tmpx = 0;
7340 int offset;
7342 if (! current_function_interrupt)
7343 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7344 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7345 && ! FUNCTION_ARG_REGNO_P (i)
7346 && i != FIRST_RET_REG
7347 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7348 && ! (crtl->calls_eh_return
7349 && (i == EH_RETURN_STACKADJ_REGNO
7350 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7351 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7352 schedule->temps[tmpx++] = i;
7353 entry->reg = -1;
7354 entry->mode = VOIDmode;
7355 entry->offset = offset_base;
7356 entry++;
7357 /* We loop twice: first, we save 8-byte aligned registers in the
7358 higher addresses, that are known to be aligned. Then, we
7359 proceed to saving 32-bit registers that don't need 8-byte
7360 alignment.
7361 If this is an interrupt function, all registers that need saving
7362 need to be saved in full. moreover, we need to postpone saving
7363 target registers till we have saved some general purpose registers
7364 we can then use as scratch registers. */
7365 offset = offset_base;
7366 for (align = 1; align >= 0; align--)
7368 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7369 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7371 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
7372 int reg = i;
7374 if (current_function_interrupt)
7376 if (TARGET_REGISTER_P (i))
7377 continue;
7378 if (GENERAL_REGISTER_P (i))
7379 mode = DImode;
7381 if (mode == SFmode && (i % 2) == 1
7382 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7383 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7385 mode = DFmode;
7386 i--;
7387 reg--;
7390 /* If we're doing the aligned pass and this is not aligned,
7391 or we're doing the unaligned pass and this is aligned,
7392 skip it. */
7393 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7394 != align)
7395 continue;
7397 if (current_function_interrupt
7398 && GENERAL_REGISTER_P (i)
7399 && tmpx < MAX_TEMPS)
7400 schedule->temps[tmpx++] = i;
7402 offset -= GET_MODE_SIZE (mode);
7403 entry->reg = i;
7404 entry->mode = mode;
7405 entry->offset = offset;
7406 entry++;
7408 if (align && current_function_interrupt)
7409 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7410 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7412 offset -= GET_MODE_SIZE (DImode);
7413 entry->reg = i;
7414 entry->mode = DImode;
7415 entry->offset = offset;
7416 entry++;
7419 entry->reg = -1;
7420 entry->mode = VOIDmode;
7421 entry->offset = offset;
7422 schedule->temps[tmpx] = -1;
7423 return entry - 1;
7426 /* Expand code for the function prologue. */
7427 void
7428 sh_expand_prologue (void)
7430 HARD_REG_SET live_regs_mask;
7431 int d, i;
7432 int d_rounding = 0;
7433 int save_flags = target_flags;
7434 int pretend_args;
7435 int stack_usage;
7436 tree sp_switch_attr
7437 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7439 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7441 /* We have pretend args if we had an object sent partially in registers
7442 and partially on the stack, e.g. a large structure. */
7443 pretend_args = crtl->args.pretend_args_size;
7444 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7445 && (NPARM_REGS(SImode)
7446 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7447 pretend_args = 0;
7449 output_stack_adjust (-pretend_args
7450 - crtl->args.info.stack_regs * 8,
7451 stack_pointer_rtx, 0, NULL, true);
7452 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7454 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7455 /* We're going to use the PIC register to load the address of the
7456 incoming-argument decoder and/or of the return trampoline from
7457 the GOT, so make sure the PIC register is preserved and
7458 initialized. */
7459 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7461 if (TARGET_SHCOMPACT
7462 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7464 int reg;
7466 /* First, make all registers with incoming arguments that will
7467 be pushed onto the stack live, so that register renaming
7468 doesn't overwrite them. */
7469 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7470 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7471 >= NPARM_REGS (SImode) - reg)
7472 for (; reg < NPARM_REGS (SImode); reg++)
7473 emit_insn (gen_shcompact_preserve_incoming_args
7474 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7475 else if (CALL_COOKIE_INT_REG_GET
7476 (crtl->args.info.call_cookie, reg) == 1)
7477 emit_insn (gen_shcompact_preserve_incoming_args
7478 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7480 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7481 stack_pointer_rtx);
7482 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7483 GEN_INT (crtl->args.info.call_cookie));
7484 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7485 gen_rtx_REG (SImode, R0_REG));
7487 else if (TARGET_SHMEDIA)
7489 int tr = sh_media_register_for_return ();
7491 if (tr >= 0)
7492 emit_move_insn (gen_rtx_REG (DImode, tr),
7493 gen_rtx_REG (DImode, PR_MEDIA_REG));
7496 /* Emit the code for SETUP_VARARGS. */
7497 if (cfun->stdarg)
7499 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7501 /* Push arg regs as if they'd been provided by caller in stack. */
7502 for (i = 0; i < NPARM_REGS(SImode); i++)
7504 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7506 if (i >= (NPARM_REGS(SImode)
7507 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7509 break;
7510 push (rn);
7511 stack_usage += GET_MODE_SIZE (SImode);
7516 /* If we're supposed to switch stacks at function entry, do so now. */
7517 if (sp_switch_attr)
7519 rtx lab, newsrc;
7520 /* The argument specifies a variable holding the address of the
7521 stack the interrupt function should switch to/from at entry/exit. */
7522 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7523 const char *s
7524 = ggc_strdup (TREE_STRING_POINTER (arg));
7525 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7527 lab = add_constant (sp_switch, SImode, 0);
7528 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7530 emit_insn (gen_sp_switch_1 (newsrc));
7533 d = calc_live_regs (&live_regs_mask);
7534 /* ??? Maybe we could save some switching if we can move a mode switch
7535 that already happens to be at the function start into the prologue. */
7536 if (target_flags != save_flags && ! current_function_interrupt)
7537 emit_insn (gen_toggle_sz ());
7539 if (TARGET_SH5)
7541 int offset_base, offset;
7542 rtx r0 = NULL_RTX;
7543 int offset_in_r0 = -1;
7544 int sp_in_r0 = 0;
7545 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7546 int total_size, save_size;
7547 save_schedule schedule;
7548 save_entry *entry;
7549 int *tmp_pnt;
7551 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7552 && ! current_function_interrupt)
7553 r0 = gen_rtx_REG (Pmode, R0_REG);
7555 /* D is the actual number of bytes that we need for saving registers,
7556 however, in initial_elimination_offset we have committed to using
7557 an additional TREGS_SPACE amount of bytes - in order to keep both
7558 addresses to arguments supplied by the caller and local variables
7559 valid, we must keep this gap. Place it between the incoming
7560 arguments and the actually saved registers in a bid to optimize
7561 locality of reference. */
7562 total_size = d + tregs_space;
7563 total_size += rounded_frame_size (total_size);
7564 save_size = total_size - rounded_frame_size (d);
7565 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7566 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7567 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7569 /* If adjusting the stack in a single step costs nothing extra, do so.
7570 I.e. either if a single addi is enough, or we need a movi anyway,
7571 and we don't exceed the maximum offset range (the test for the
7572 latter is conservative for simplicity). */
7573 if (TARGET_SHMEDIA
7574 && (CONST_OK_FOR_I10 (-total_size)
7575 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7576 && total_size <= 2044)))
7577 d_rounding = total_size - save_size;
7579 offset_base = d + d_rounding;
7581 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7582 0, NULL, true);
7583 stack_usage += save_size + d_rounding;
7585 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7586 tmp_pnt = schedule.temps;
7587 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7589 enum machine_mode mode = (enum machine_mode) entry->mode;
7590 unsigned int reg = entry->reg;
7591 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7592 rtx orig_reg_rtx;
7594 offset = entry->offset;
7596 reg_rtx = gen_rtx_REG (mode, reg);
7598 mem_rtx = gen_frame_mem (mode,
7599 gen_rtx_PLUS (Pmode,
7600 stack_pointer_rtx,
7601 GEN_INT (offset)));
7603 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7605 gcc_assert (r0);
7606 mem_rtx = NULL_RTX;
7609 if (HAVE_PRE_DECREMENT
7610 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7611 || mem_rtx == NULL_RTX
7612 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7614 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7616 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7617 pre_dec = NULL_RTX;
7618 else
7620 mem_rtx = NULL_RTX;
7621 offset += GET_MODE_SIZE (mode);
7625 if (mem_rtx != NULL_RTX)
7626 goto addr_ok;
7628 if (offset_in_r0 == -1)
7630 emit_move_insn (r0, GEN_INT (offset));
7631 offset_in_r0 = offset;
7633 else if (offset != offset_in_r0)
7635 emit_move_insn (r0,
7636 gen_rtx_PLUS
7637 (Pmode, r0,
7638 GEN_INT (offset - offset_in_r0)));
7639 offset_in_r0 += offset - offset_in_r0;
7642 if (pre_dec != NULL_RTX)
7644 if (! sp_in_r0)
7646 emit_move_insn (r0,
7647 gen_rtx_PLUS
7648 (Pmode, r0, stack_pointer_rtx));
7649 sp_in_r0 = 1;
7652 offset -= GET_MODE_SIZE (mode);
7653 offset_in_r0 -= GET_MODE_SIZE (mode);
7655 mem_rtx = pre_dec;
7657 else if (sp_in_r0)
7658 mem_rtx = gen_frame_mem (mode, r0);
7659 else
7660 mem_rtx = gen_frame_mem (mode,
7661 gen_rtx_PLUS (Pmode,
7662 stack_pointer_rtx,
7663 r0));
7665 /* We must not use an r0-based address for target-branch
7666 registers or for special registers without pre-dec
7667 memory addresses, since we store their values in r0
7668 first. */
7669 gcc_assert (!TARGET_REGISTER_P (reg)
7670 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7671 || mem_rtx == pre_dec));
7673 addr_ok:
7674 orig_reg_rtx = reg_rtx;
7675 if (TARGET_REGISTER_P (reg)
7676 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7677 && mem_rtx != pre_dec))
7679 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7681 emit_move_insn (tmp_reg, reg_rtx);
7683 if (REGNO (tmp_reg) == R0_REG)
7685 offset_in_r0 = -1;
7686 sp_in_r0 = 0;
7687 gcc_assert (!refers_to_regno_p
7688 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7691 if (*++tmp_pnt <= 0)
7692 tmp_pnt = schedule.temps;
7694 reg_rtx = tmp_reg;
7697 rtx insn;
7699 /* Mark as interesting for dwarf cfi generator */
7700 insn = emit_move_insn (mem_rtx, reg_rtx);
7701 RTX_FRAME_RELATED_P (insn) = 1;
7702 /* If we use an intermediate register for the save, we can't
7703 describe this exactly in cfi as a copy of the to-be-saved
7704 register into the temporary register and then the temporary
7705 register on the stack, because the temporary register can
7706 have a different natural size than the to-be-saved register.
7707 Thus, we gloss over the intermediate copy and pretend we do
7708 a direct save from the to-be-saved register. */
7709 if (REGNO (reg_rtx) != reg)
7711 rtx set;
7713 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7714 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7717 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7719 rtx reg_rtx = gen_rtx_REG (mode, reg);
7720 rtx set;
7721 rtx mem_rtx = gen_frame_mem (mode,
7722 gen_rtx_PLUS (Pmode,
7723 stack_pointer_rtx,
7724 GEN_INT (offset)));
7726 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7727 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7732 gcc_assert (entry->offset == d_rounding);
7734 else
7736 push_regs (&live_regs_mask, current_function_interrupt);
7737 stack_usage += d;
7740 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7741 emit_insn (gen_GOTaddr2picreg ());
7743 if (SHMEDIA_REGS_STACK_ADJUST ())
7745 /* This must NOT go through the PLT, otherwise mach and macl
7746 may be clobbered. */
7747 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7748 (TARGET_FPU_ANY
7749 ? "__GCC_push_shmedia_regs"
7750 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7751 emit_insn (gen_shmedia_save_restore_regs_compact
7752 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7755 if (target_flags != save_flags && ! current_function_interrupt)
7756 emit_insn (gen_toggle_sz ());
7758 target_flags = save_flags;
7760 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7761 stack_pointer_rtx, 0, NULL, true);
7762 stack_usage += rounded_frame_size (d) - d_rounding;
7764 if (frame_pointer_needed)
7765 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7767 if (TARGET_SHCOMPACT
7768 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7770 /* This must NOT go through the PLT, otherwise mach and macl
7771 may be clobbered. */
7772 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7773 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7774 emit_insn (gen_shcompact_incoming_args ());
7777 /* If we are profiling, make sure no instructions are scheduled before
7778 the call to mcount. Similarly if some call instructions are swapped
7779 before frame related insns, it'll confuse the unwinder because
7780 currently SH has no unwind info for function epilogues. */
7781 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7782 emit_insn (gen_blockage ());
7784 if (flag_stack_usage_info)
7785 current_function_static_stack_size = stack_usage;
7788 /* Expand code for the function epilogue. */
7789 void
7790 sh_expand_epilogue (bool sibcall_p)
7792 HARD_REG_SET live_regs_mask;
7793 int d, i;
7794 int d_rounding = 0;
7796 int save_flags = target_flags;
7797 int frame_size, save_size;
7798 int fpscr_deferred = 0;
7799 int e = sibcall_p ? -1 : 1;
7801 d = calc_live_regs (&live_regs_mask);
7803 save_size = d;
7804 frame_size = rounded_frame_size (d);
7806 if (TARGET_SH5)
7808 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7809 int total_size;
7810 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7811 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7812 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7814 total_size = d + tregs_space;
7815 total_size += rounded_frame_size (total_size);
7816 save_size = total_size - frame_size;
7818 /* If adjusting the stack in a single step costs nothing extra, do so.
7819 I.e. either if a single addi is enough, or we need a movi anyway,
7820 and we don't exceed the maximum offset range (the test for the
7821 latter is conservative for simplicity). */
7822 if (TARGET_SHMEDIA
7823 && ! frame_pointer_needed
7824 && (CONST_OK_FOR_I10 (total_size)
7825 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7826 && total_size <= 2044)))
7827 d_rounding = frame_size;
7829 frame_size -= d_rounding;
7832 if (frame_pointer_needed)
7834 /* We must avoid scheduling the epilogue with previous basic blocks.
7835 See PR/18032 and PR/40313. */
7836 emit_insn (gen_blockage ());
7837 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7838 &live_regs_mask, true);
7840 /* We must avoid moving the stack pointer adjustment past code
7841 which reads from the local frame, else an interrupt could
7842 occur after the SP adjustment and clobber data in the local
7843 frame. */
7844 emit_insn (gen_blockage ());
7845 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7847 else if (frame_size)
7849 /* We must avoid moving the stack pointer adjustment past code
7850 which reads from the local frame, else an interrupt could
7851 occur after the SP adjustment and clobber data in the local
7852 frame. */
7853 emit_insn (gen_blockage ());
7854 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7855 &live_regs_mask, true);
7858 if (SHMEDIA_REGS_STACK_ADJUST ())
7860 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7861 (TARGET_FPU_ANY
7862 ? "__GCC_pop_shmedia_regs"
7863 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7864 /* This must NOT go through the PLT, otherwise mach and macl
7865 may be clobbered. */
7866 emit_insn (gen_shmedia_save_restore_regs_compact
7867 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7870 /* Pop all the registers. */
7872 if (target_flags != save_flags && ! current_function_interrupt)
7873 emit_insn (gen_toggle_sz ());
7874 if (TARGET_SH5)
7876 int offset_base, offset;
7877 int offset_in_r0 = -1;
7878 int sp_in_r0 = 0;
7879 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7880 save_schedule schedule;
7881 save_entry *entry;
7882 int *tmp_pnt;
7884 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7885 offset_base = -entry[1].offset + d_rounding;
7886 tmp_pnt = schedule.temps;
7887 for (; entry->mode != VOIDmode; entry--)
7889 enum machine_mode mode = (enum machine_mode) entry->mode;
7890 int reg = entry->reg;
7891 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7893 offset = offset_base + entry->offset;
7894 reg_rtx = gen_rtx_REG (mode, reg);
7896 mem_rtx = gen_frame_mem (mode,
7897 gen_rtx_PLUS (Pmode,
7898 stack_pointer_rtx,
7899 GEN_INT (offset)));
7901 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7902 mem_rtx = NULL_RTX;
7904 if (HAVE_POST_INCREMENT
7905 && (offset == offset_in_r0
7906 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7907 && mem_rtx == NULL_RTX)
7908 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7910 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7912 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7913 post_inc = NULL_RTX;
7914 else
7915 mem_rtx = NULL_RTX;
7918 if (mem_rtx != NULL_RTX)
7919 goto addr_ok;
7921 if (offset_in_r0 == -1)
7923 emit_move_insn (r0, GEN_INT (offset));
7924 offset_in_r0 = offset;
7926 else if (offset != offset_in_r0)
7928 emit_move_insn (r0,
7929 gen_rtx_PLUS
7930 (Pmode, r0,
7931 GEN_INT (offset - offset_in_r0)));
7932 offset_in_r0 += offset - offset_in_r0;
7935 if (post_inc != NULL_RTX)
7937 if (! sp_in_r0)
7939 emit_move_insn (r0,
7940 gen_rtx_PLUS
7941 (Pmode, r0, stack_pointer_rtx));
7942 sp_in_r0 = 1;
7945 mem_rtx = post_inc;
7947 offset_in_r0 += GET_MODE_SIZE (mode);
7949 else if (sp_in_r0)
7950 mem_rtx = gen_frame_mem (mode, r0);
7951 else
7952 mem_rtx = gen_frame_mem (mode,
7953 gen_rtx_PLUS (Pmode,
7954 stack_pointer_rtx,
7955 r0));
7957 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7958 || mem_rtx == post_inc);
7960 addr_ok:
7961 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7962 && mem_rtx != post_inc)
7964 emit_move_insn (r0, mem_rtx);
7965 mem_rtx = r0;
7967 else if (TARGET_REGISTER_P (reg))
7969 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
7971 /* Give the scheduler a bit of freedom by using up to
7972 MAX_TEMPS registers in a round-robin fashion. */
7973 emit_move_insn (tmp_reg, mem_rtx);
7974 mem_rtx = tmp_reg;
7975 if (*++tmp_pnt < 0)
7976 tmp_pnt = schedule.temps;
7979 emit_move_insn (reg_rtx, mem_rtx);
7982 gcc_assert (entry->offset + offset_base == d + d_rounding);
7984 else /* ! TARGET_SH5 */
7986 int last_reg;
7988 save_size = 0;
7989 /* For an ISR with RESBANK attribute assigned, don't pop PR
7990 register. */
7991 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7992 && !sh_cfun_resbank_handler_p ())
7994 if (!frame_pointer_needed)
7995 emit_insn (gen_blockage ());
7996 pop (PR_REG);
7999 /* Banked registers are popped first to avoid being scheduled in the
8000 delay slot. RTE switches banks before the ds instruction. */
8001 if (current_function_interrupt)
8003 bool use_movml = false;
8005 if (TARGET_SH2A)
8007 unsigned int count = 0;
8009 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8010 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8011 count++;
8012 else
8013 break;
8015 /* Use movml when all banked register are poped. */
8016 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8017 use_movml = true;
8020 if (sh_cfun_resbank_handler_p ())
8021 ; /* Do nothing. */
8022 else if (use_movml)
8024 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8026 /* We must avoid scheduling multiple load insn with another
8027 insns. */
8028 emit_insn (gen_blockage ());
8029 emit_insn (gen_movml_pop_banked (sp_reg));
8030 emit_insn (gen_blockage ());
8032 else
8033 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8034 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8035 pop (i);
8037 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8039 else
8040 last_reg = FIRST_PSEUDO_REGISTER;
8042 for (i = 0; i < last_reg; i++)
8044 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8046 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8047 && hard_reg_set_intersect_p (live_regs_mask,
8048 reg_class_contents[DF_REGS]))
8049 fpscr_deferred = 1;
8050 /* For an ISR with RESBANK attribute assigned, don't pop
8051 following registers, R0-R14, MACH, MACL and GBR. */
8052 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8053 && ! (sh_cfun_resbank_handler_p ()
8054 && ((j >= FIRST_GENERAL_REG
8055 && j < LAST_GENERAL_REG)
8056 || j == MACH_REG
8057 || j == MACL_REG
8058 || j == GBR_REG)))
8059 pop (j);
8061 if (j == FIRST_FP_REG && fpscr_deferred)
8062 pop (FPSCR_REG);
8065 if (target_flags != save_flags && ! current_function_interrupt)
8066 emit_insn (gen_toggle_sz ());
8067 target_flags = save_flags;
8069 output_stack_adjust (crtl->args.pretend_args_size
8070 + save_size + d_rounding
8071 + crtl->args.info.stack_regs * 8,
8072 stack_pointer_rtx, e, NULL, true);
8074 if (crtl->calls_eh_return)
8075 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8076 EH_RETURN_STACKADJ_RTX));
8078 /* Switch back to the normal stack if necessary. */
8079 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8080 emit_insn (gen_sp_switch_2 ());
8082 /* Tell flow the insn that pops PR isn't dead. */
8083 /* PR_REG will never be live in SHmedia mode, and we don't need to
8084 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8085 by the return pattern. */
8086 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8087 emit_use (gen_rtx_REG (SImode, PR_REG));
8090 /* Emit code to change the current function's return address to RA.
8091 TEMP is available as a scratch register, if needed. */
8092 void
8093 sh_set_return_address (rtx ra, rtx tmp)
8095 HARD_REG_SET live_regs_mask;
8096 int d;
8097 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8098 int pr_offset;
8100 d = calc_live_regs (&live_regs_mask);
8102 /* If pr_reg isn't life, we can set it (or the register given in
8103 sh_media_register_for_return) directly. */
8104 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8106 rtx rr;
8108 if (TARGET_SHMEDIA)
8110 int rr_regno = sh_media_register_for_return ();
8112 if (rr_regno < 0)
8113 rr_regno = pr_reg;
8115 rr = gen_rtx_REG (DImode, rr_regno);
8117 else
8118 rr = gen_rtx_REG (SImode, pr_reg);
8120 emit_insn (GEN_MOV (rr, ra));
8121 /* Tell flow the register for return isn't dead. */
8122 emit_use (rr);
8123 return;
8126 if (TARGET_SH5)
8128 int offset;
8129 save_schedule schedule;
8130 save_entry *entry;
8132 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8133 offset = entry[1].offset;
8134 for (; entry->mode != VOIDmode; entry--)
8135 if (entry->reg == pr_reg)
8136 goto found;
8138 /* We can't find pr register. */
8139 gcc_unreachable ();
8141 found:
8142 offset = entry->offset - offset;
8143 pr_offset = (rounded_frame_size (d) + offset
8144 + SHMEDIA_REGS_STACK_ADJUST ());
8146 else
8147 pr_offset = rounded_frame_size (d);
8149 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8151 if (frame_pointer_needed)
8152 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8153 else
8154 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8156 tmp = gen_frame_mem (Pmode, tmp);
8157 emit_insn (GEN_MOV (tmp, ra));
8158 /* Tell this store isn't dead. */
8159 emit_use (tmp);
8162 /* Clear variables at function end. */
8163 static void
8164 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8165 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8169 static rtx
8170 sh_builtin_saveregs (void)
8172 /* First unnamed integer register. */
8173 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8174 /* Number of integer registers we need to save. */
8175 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8176 /* First unnamed SFmode float reg */
8177 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8178 /* Number of SFmode float regs to save. */
8179 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8180 rtx regbuf, fpregs;
8181 int bufsize, regno;
8182 alias_set_type alias_set;
8184 if (TARGET_SH5)
8186 if (n_intregs)
8188 int pushregs = n_intregs;
8190 while (pushregs < NPARM_REGS (SImode) - 1
8191 && (CALL_COOKIE_INT_REG_GET
8192 (crtl->args.info.call_cookie,
8193 NPARM_REGS (SImode) - pushregs)
8194 == 1))
8196 crtl->args.info.call_cookie
8197 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8198 - pushregs, 1);
8199 pushregs++;
8202 if (pushregs == NPARM_REGS (SImode))
8203 crtl->args.info.call_cookie
8204 |= (CALL_COOKIE_INT_REG (0, 1)
8205 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8206 else
8207 crtl->args.info.call_cookie
8208 |= CALL_COOKIE_STACKSEQ (pushregs);
8210 crtl->args.pretend_args_size += 8 * n_intregs;
8212 if (TARGET_SHCOMPACT)
8213 return const0_rtx;
8216 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8218 error ("__builtin_saveregs not supported by this subtarget");
8219 return const0_rtx;
8222 if (TARGET_SHMEDIA)
8223 n_floatregs = 0;
8225 /* Allocate block of memory for the regs. */
8226 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8227 Or can assign_stack_local accept a 0 SIZE argument? */
8228 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8230 if (TARGET_SHMEDIA)
8231 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8232 else if (n_floatregs & 1)
8234 rtx addr;
8236 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8237 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8238 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8239 regbuf = change_address (regbuf, BLKmode, addr);
8241 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8243 rtx addr, mask;
8245 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8246 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8247 XEXP (regbuf, 0), 4));
8248 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8249 emit_insn (gen_andsi3 (addr, addr, mask));
8250 regbuf = change_address (regbuf, BLKmode, addr);
8252 else
8253 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8254 alias_set = get_varargs_alias_set ();
8255 set_mem_alias_set (regbuf, alias_set);
8257 /* Save int args.
8258 This is optimized to only save the regs that are necessary. Explicitly
8259 named args need not be saved. */
8260 if (n_intregs > 0)
8261 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8262 adjust_address (regbuf, BLKmode,
8263 n_floatregs * UNITS_PER_WORD),
8264 n_intregs);
8266 if (TARGET_SHMEDIA)
8267 /* Return the address of the regbuf. */
8268 return XEXP (regbuf, 0);
8270 /* Save float args.
8271 This is optimized to only save the regs that are necessary. Explicitly
8272 named args need not be saved.
8273 We explicitly build a pointer to the buffer because it halves the insn
8274 count when not optimizing (otherwise the pointer is built for each reg
8275 saved).
8276 We emit the moves in reverse order so that we can use predecrement. */
8278 fpregs = copy_to_mode_reg (Pmode,
8279 plus_constant (Pmode, XEXP (regbuf, 0),
8280 n_floatregs * UNITS_PER_WORD));
8281 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8283 rtx mem;
8284 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8286 emit_insn (gen_addsi3 (fpregs, fpregs,
8287 GEN_INT (-2 * UNITS_PER_WORD)));
8288 mem = change_address (regbuf, DFmode, fpregs);
8289 emit_move_insn (mem,
8290 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8292 regno = first_floatreg;
8293 if (regno & 1)
8295 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8296 mem = change_address (regbuf, SFmode, fpregs);
8297 emit_move_insn (mem,
8298 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8299 + regno - SH_REG_MSW_OFFSET));
8302 else
8303 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8305 rtx mem;
8307 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8308 mem = change_address (regbuf, SFmode, fpregs);
8309 emit_move_insn (mem,
8310 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8313 /* Return the address of the regbuf. */
8314 return XEXP (regbuf, 0);
8317 /* Define the `__builtin_va_list' type for the ABI. */
8318 static tree
8319 sh_build_builtin_va_list (void)
8321 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8322 tree record, type_decl;
8324 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8325 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8326 return ptr_type_node;
8328 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8329 type_decl = build_decl (BUILTINS_LOCATION,
8330 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8332 f_next_o = build_decl (BUILTINS_LOCATION,
8333 FIELD_DECL, get_identifier ("__va_next_o"),
8334 ptr_type_node);
8335 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8336 FIELD_DECL,
8337 get_identifier ("__va_next_o_limit"),
8338 ptr_type_node);
8339 f_next_fp = build_decl (BUILTINS_LOCATION,
8340 FIELD_DECL, get_identifier ("__va_next_fp"),
8341 ptr_type_node);
8342 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8343 FIELD_DECL,
8344 get_identifier ("__va_next_fp_limit"),
8345 ptr_type_node);
8346 f_next_stack = build_decl (BUILTINS_LOCATION,
8347 FIELD_DECL, get_identifier ("__va_next_stack"),
8348 ptr_type_node);
8350 DECL_FIELD_CONTEXT (f_next_o) = record;
8351 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8352 DECL_FIELD_CONTEXT (f_next_fp) = record;
8353 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8354 DECL_FIELD_CONTEXT (f_next_stack) = record;
8356 TYPE_STUB_DECL (record) = type_decl;
8357 TYPE_NAME (record) = type_decl;
8358 TYPE_FIELDS (record) = f_next_o;
8359 DECL_CHAIN (f_next_o) = f_next_o_limit;
8360 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8361 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8362 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8364 layout_type (record);
8366 return record;
8369 /* Implement `va_start' for varargs and stdarg. */
8370 static void
8371 sh_va_start (tree valist, rtx nextarg)
8373 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8374 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8375 tree t, u;
8376 int nfp, nint;
8378 if (TARGET_SH5)
8380 expand_builtin_saveregs ();
8381 std_expand_builtin_va_start (valist, nextarg);
8382 return;
8385 if ((! TARGET_SH2E && ! TARGET_SH4)
8386 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8388 std_expand_builtin_va_start (valist, nextarg);
8389 return;
8392 f_next_o = TYPE_FIELDS (va_list_type_node);
8393 f_next_o_limit = DECL_CHAIN (f_next_o);
8394 f_next_fp = DECL_CHAIN (f_next_o_limit);
8395 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8396 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8398 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8399 NULL_TREE);
8400 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8401 valist, f_next_o_limit, NULL_TREE);
8402 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8403 NULL_TREE);
8404 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8405 valist, f_next_fp_limit, NULL_TREE);
8406 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8407 valist, f_next_stack, NULL_TREE);
8409 /* Call __builtin_saveregs. */
8410 u = make_tree (sizetype, expand_builtin_saveregs ());
8411 u = fold_convert (ptr_type_node, u);
8412 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8413 TREE_SIDE_EFFECTS (t) = 1;
8414 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8416 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8417 if (nfp < 8)
8418 nfp = 8 - nfp;
8419 else
8420 nfp = 0;
8421 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8422 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8423 TREE_SIDE_EFFECTS (t) = 1;
8424 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8426 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8427 TREE_SIDE_EFFECTS (t) = 1;
8428 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8430 nint = crtl->args.info.arg_count[SH_ARG_INT];
8431 if (nint < 4)
8432 nint = 4 - nint;
8433 else
8434 nint = 0;
8435 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8436 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8437 TREE_SIDE_EFFECTS (t) = 1;
8438 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8440 u = make_tree (ptr_type_node, nextarg);
8441 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8442 TREE_SIDE_EFFECTS (t) = 1;
8443 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8446 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8447 member, return it. */
8448 static tree
8449 find_sole_member (tree type)
8451 tree field, member = NULL_TREE;
8453 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8455 if (TREE_CODE (field) != FIELD_DECL)
8456 continue;
8457 if (!DECL_SIZE (field))
8458 return NULL_TREE;
8459 if (integer_zerop (DECL_SIZE (field)))
8460 continue;
8461 if (member)
8462 return NULL_TREE;
8463 member = field;
8465 return member;
8468 /* Implement `va_arg'. */
8469 static tree
8470 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8471 gimple_seq *post_p ATTRIBUTE_UNUSED)
8473 HOST_WIDE_INT size, rsize;
8474 tree tmp, pptr_type_node;
8475 tree addr, lab_over = NULL, result = NULL;
8476 bool pass_by_ref;
8477 tree eff_type;
8479 if (!VOID_TYPE_P (type))
8480 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8481 else
8482 pass_by_ref = false;
8484 if (pass_by_ref)
8485 type = build_pointer_type (type);
8487 size = int_size_in_bytes (type);
8488 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8489 pptr_type_node = build_pointer_type (ptr_type_node);
8491 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8492 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8494 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8495 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8496 int pass_as_float;
8497 tree lab_false;
8498 tree member;
8500 f_next_o = TYPE_FIELDS (va_list_type_node);
8501 f_next_o_limit = DECL_CHAIN (f_next_o);
8502 f_next_fp = DECL_CHAIN (f_next_o_limit);
8503 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8504 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8506 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8507 NULL_TREE);
8508 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8509 valist, f_next_o_limit, NULL_TREE);
8510 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8511 valist, f_next_fp, NULL_TREE);
8512 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8513 valist, f_next_fp_limit, NULL_TREE);
8514 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8515 valist, f_next_stack, NULL_TREE);
8517 /* Structures with a single member with a distinct mode are passed
8518 like their member. This is relevant if the latter has a REAL_TYPE
8519 or COMPLEX_TYPE type. */
8520 eff_type = type;
8521 while (TREE_CODE (eff_type) == RECORD_TYPE
8522 && (member = find_sole_member (eff_type))
8523 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8524 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8525 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8527 tree field_type = TREE_TYPE (member);
8529 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8530 eff_type = field_type;
8531 else
8533 gcc_assert ((TYPE_ALIGN (eff_type)
8534 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8535 || (TYPE_ALIGN (eff_type)
8536 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8537 break;
8541 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8543 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8544 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8545 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8546 && size <= 16));
8548 else
8550 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8553 addr = create_tmp_var (pptr_type_node, NULL);
8554 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8555 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8557 valist = build_simple_mem_ref (addr);
8559 if (pass_as_float)
8561 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8562 tree cmp;
8563 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8565 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8566 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8568 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8569 tmp = next_fp_limit;
8570 if (size > 4 && !is_double)
8571 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8572 tmp = build2 (GE_EXPR, boolean_type_node,
8573 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8574 cmp = build3 (COND_EXPR, void_type_node, tmp,
8575 build1 (GOTO_EXPR, void_type_node,
8576 unshare_expr (lab_false)), NULL_TREE);
8577 if (!is_double)
8578 gimplify_and_add (cmp, pre_p);
8580 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8581 || (is_double || size == 16))
8583 tmp = fold_convert (sizetype, next_fp_tmp);
8584 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8585 size_int (UNITS_PER_WORD));
8586 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8587 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8589 if (is_double)
8590 gimplify_and_add (cmp, pre_p);
8592 #ifdef FUNCTION_ARG_SCmode_WART
8593 if (TYPE_MODE (eff_type) == SCmode
8594 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8596 tree subtype = TREE_TYPE (eff_type);
8597 tree real, imag;
8599 imag
8600 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8601 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8603 real
8604 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8605 real = get_initialized_tmp_var (real, pre_p, NULL);
8607 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8608 if (type != eff_type)
8609 result = build1 (VIEW_CONVERT_EXPR, type, result);
8610 result = get_initialized_tmp_var (result, pre_p, NULL);
8612 #endif /* FUNCTION_ARG_SCmode_WART */
8614 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8615 gimplify_and_add (tmp, pre_p);
8617 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8618 gimplify_and_add (tmp, pre_p);
8620 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8621 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8622 gimplify_assign (unshare_expr (next_fp_tmp),
8623 unshare_expr (valist), pre_p);
8625 gimplify_assign (unshare_expr (valist),
8626 unshare_expr (next_fp_tmp), post_p);
8627 valist = next_fp_tmp;
8629 else
8631 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8632 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8633 unshare_expr (next_o_limit));
8634 tmp = build3 (COND_EXPR, void_type_node, tmp,
8635 build1 (GOTO_EXPR, void_type_node,
8636 unshare_expr (lab_false)),
8637 NULL_TREE);
8638 gimplify_and_add (tmp, pre_p);
8640 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8641 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8643 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8644 gimplify_and_add (tmp, pre_p);
8646 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8647 gimplify_and_add (tmp, pre_p);
8649 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8650 gimplify_assign (unshare_expr (next_o),
8651 unshare_expr (next_o_limit), pre_p);
8653 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8654 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8657 if (!result)
8659 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8660 gimplify_and_add (tmp, pre_p);
8664 /* ??? In va-sh.h, there had been code to make values larger than
8665 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8667 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8668 if (result)
8670 gimplify_assign (result, tmp, pre_p);
8671 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8672 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8673 gimplify_and_add (tmp, pre_p);
8675 else
8676 result = tmp;
8678 if (pass_by_ref)
8679 result = build_va_arg_indirect_ref (result);
8681 return result;
8684 /* 64 bit floating points memory transfers are paired single precision loads
8685 or store. So DWARF information needs fixing in little endian (unless
8686 PR=SZ=1 in FPSCR). */
8688 sh_dwarf_register_span (rtx reg)
8690 unsigned regno = REGNO (reg);
8692 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8693 return NULL_RTX;
8695 return
8696 gen_rtx_PARALLEL (VOIDmode,
8697 gen_rtvec (2,
8698 gen_rtx_REG (SFmode, regno + 1),
8699 gen_rtx_REG (SFmode, regno)));
8702 static enum machine_mode
8703 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8704 int *punsignedp, const_tree funtype,
8705 int for_return)
8707 if (sh_promote_prototypes (funtype))
8708 return promote_mode (type, mode, punsignedp);
8709 else
8710 return default_promote_function_mode (type, mode, punsignedp, funtype,
8711 for_return);
8714 static bool
8715 sh_promote_prototypes (const_tree type)
8717 if (TARGET_HITACHI)
8718 return false;
8719 if (! type)
8720 return true;
8721 return ! sh_attr_renesas_p (type);
8724 /* Whether an argument must be passed by reference. On SHcompact, we
8725 pretend arguments wider than 32-bits that would have been passed in
8726 registers are passed by reference, so that an SHmedia trampoline
8727 loads them into the full 64-bits registers. */
8728 static int
8729 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8730 const_tree type, bool named)
8732 unsigned HOST_WIDE_INT size;
8734 if (type)
8735 size = int_size_in_bytes (type);
8736 else
8737 size = GET_MODE_SIZE (mode);
8739 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8740 && (!named
8741 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8742 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8743 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8744 && size > 4
8745 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8746 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8747 return size;
8748 else
8749 return 0;
8752 static bool
8753 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8754 const_tree type, bool named)
8756 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8758 if (targetm.calls.must_pass_in_stack (mode, type))
8759 return true;
8761 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8762 wants to know about pass-by-reference semantics for incoming
8763 arguments. */
8764 if (! cum)
8765 return false;
8767 if (TARGET_SHCOMPACT)
8769 cum->byref = shcompact_byref (cum, mode, type, named);
8770 return cum->byref != 0;
8773 return false;
8776 static bool
8777 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8778 const_tree type, bool named ATTRIBUTE_UNUSED)
8780 /* ??? How can it possibly be correct to return true only on the
8781 caller side of the equation? Is there someplace else in the
8782 sh backend that's magically producing the copies? */
8783 return (get_cumulative_args (cum)->outgoing
8784 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8785 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8788 static int
8789 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8790 tree type, bool named ATTRIBUTE_UNUSED)
8792 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8793 int words = 0;
8795 if (!TARGET_SH5
8796 && PASS_IN_REG_P (*cum, mode, type)
8797 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8798 && (ROUND_REG (*cum, mode)
8799 + (mode != BLKmode
8800 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
8801 : ROUND_ADVANCE (int_size_in_bytes (type)))
8802 > NPARM_REGS (mode)))
8803 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
8805 else if (!TARGET_SHCOMPACT
8806 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8807 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8809 return words * UNITS_PER_WORD;
8813 /* Define where to put the arguments to a function.
8814 Value is zero to push the argument on the stack,
8815 or a hard register in which to store the argument.
8817 MODE is the argument's machine mode.
8818 TYPE is the data type of the argument (as a tree).
8819 This is null for libcalls where that information may
8820 not be available.
8821 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8822 the preceding args and about the function being called.
8823 NAMED is nonzero if this argument is a named parameter
8824 (otherwise it is an extra parameter matching an ellipsis).
8826 On SH the first args are normally in registers
8827 and the rest are pushed. Any arg that starts within the first
8828 NPARM_REGS words is at least partially passed in a register unless
8829 its data type forbids. */
8830 static rtx
8831 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8832 const_tree type, bool named)
8834 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8836 if (! TARGET_SH5 && mode == VOIDmode)
8837 return GEN_INT (ca->renesas_abi ? 1 : 0);
8839 if (! TARGET_SH5
8840 && PASS_IN_REG_P (*ca, mode, type)
8841 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8843 int regno;
8845 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8846 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
8848 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8849 gen_rtx_REG (SFmode,
8850 BASE_ARG_REG (mode)
8851 + (ROUND_REG (*ca, mode) ^ 1)),
8852 const0_rtx);
8853 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8854 gen_rtx_REG (SFmode,
8855 BASE_ARG_REG (mode)
8856 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
8857 GEN_INT (4));
8858 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8861 /* If the alignment of a DF value causes an SF register to be
8862 skipped, we will use that skipped register for the next SF
8863 value. */
8864 if ((TARGET_HITACHI || ca->renesas_abi)
8865 && ca->free_single_fp_reg
8866 && mode == SFmode)
8867 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8869 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
8870 ^ (mode == SFmode && TARGET_SH4
8871 && TARGET_LITTLE_ENDIAN
8872 && ! TARGET_HITACHI && ! ca->renesas_abi);
8873 return gen_rtx_REG (mode, regno);
8877 if (TARGET_SH5)
8879 if (mode == VOIDmode && TARGET_SHCOMPACT)
8880 return GEN_INT (ca->call_cookie);
8882 /* The following test assumes unnamed arguments are promoted to
8883 DFmode. */
8884 if (mode == SFmode && ca->free_single_fp_reg)
8885 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
8887 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
8888 && (named || ! ca->prototype_p)
8889 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
8891 if (! ca->prototype_p && TARGET_SHMEDIA)
8892 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
8894 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
8895 FIRST_FP_PARM_REG
8896 + ca->arg_count[(int) SH_ARG_FLOAT]);
8899 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
8900 && (! TARGET_SHCOMPACT
8901 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
8902 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
8903 type, named))))
8905 return gen_rtx_REG (mode, (FIRST_PARM_REG
8906 + ca->arg_count[(int) SH_ARG_INT]));
8909 return NULL_RTX;
8912 return NULL_RTX;
8915 /* Update the data in CUM to advance over an argument
8916 of mode MODE and data type TYPE.
8917 (TYPE is null for libcalls where that information may not be
8918 available.) */
8919 static void
8920 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
8921 const_tree type, bool named)
8923 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8925 if (ca->force_mem)
8926 ca->force_mem = 0;
8927 else if (TARGET_SH5)
8929 const_tree type2 = (ca->byref && type
8930 ? TREE_TYPE (type)
8931 : type);
8932 enum machine_mode mode2 = (ca->byref && type
8933 ? TYPE_MODE (type2)
8934 : mode);
8935 int dwords = ((ca->byref
8936 ? ca->byref
8937 : mode2 == BLKmode
8938 ? int_size_in_bytes (type2)
8939 : GET_MODE_SIZE (mode2)) + 7) / 8;
8940 int numregs = MIN (dwords, NPARM_REGS (SImode)
8941 - ca->arg_count[(int) SH_ARG_INT]);
8943 if (numregs)
8945 ca->arg_count[(int) SH_ARG_INT] += numregs;
8946 if (TARGET_SHCOMPACT
8947 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
8949 ca->call_cookie
8950 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8951 - numregs, 1);
8952 /* N.B. We want this also for outgoing. */
8953 ca->stack_regs += numregs;
8955 else if (ca->byref)
8957 if (! ca->outgoing)
8958 ca->stack_regs += numregs;
8959 ca->byref_regs += numregs;
8960 ca->byref = 0;
8962 ca->call_cookie
8963 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8964 - numregs, 2);
8965 while (--numregs);
8966 ca->call_cookie
8967 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
8968 - 1, 1);
8970 else if (dwords > numregs)
8972 int pushregs = numregs;
8974 if (TARGET_SHCOMPACT)
8975 ca->stack_regs += numregs;
8976 while (pushregs < NPARM_REGS (SImode) - 1
8977 && (CALL_COOKIE_INT_REG_GET
8978 (ca->call_cookie,
8979 NPARM_REGS (SImode) - pushregs)
8980 == 1))
8982 ca->call_cookie
8983 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8984 - pushregs, 1);
8985 pushregs++;
8987 if (numregs == NPARM_REGS (SImode))
8988 ca->call_cookie
8989 |= CALL_COOKIE_INT_REG (0, 1)
8990 | CALL_COOKIE_STACKSEQ (numregs - 1);
8991 else
8992 ca->call_cookie
8993 |= CALL_COOKIE_STACKSEQ (numregs);
8996 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
8997 && (named || ! ca->prototype_p))
8999 if (mode2 == SFmode && ca->free_single_fp_reg)
9000 ca->free_single_fp_reg = 0;
9001 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9002 < NPARM_REGS (SFmode))
9004 int numfpregs
9005 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9006 NPARM_REGS (SFmode)
9007 - ca->arg_count[(int) SH_ARG_FLOAT]);
9009 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9011 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9013 if (ca->outgoing && numregs > 0)
9016 ca->call_cookie
9017 |= (CALL_COOKIE_INT_REG
9018 (ca->arg_count[(int) SH_ARG_INT]
9019 - numregs + ((numfpregs - 2) / 2),
9020 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9021 - numfpregs) / 2));
9023 while (numfpregs -= 2);
9025 else if (mode2 == SFmode && (named)
9026 && (ca->arg_count[(int) SH_ARG_FLOAT]
9027 < NPARM_REGS (SFmode)))
9028 ca->free_single_fp_reg
9029 = FIRST_FP_PARM_REG - numfpregs
9030 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9033 return;
9036 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9038 /* Note that we've used the skipped register. */
9039 if (mode == SFmode && ca->free_single_fp_reg)
9041 ca->free_single_fp_reg = 0;
9042 return;
9044 /* When we have a DF after an SF, there's an SF register that get
9045 skipped in order to align the DF value. We note this skipped
9046 register, because the next SF value will use it, and not the
9047 SF that follows the DF. */
9048 if (mode == DFmode
9049 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
9051 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
9052 + BASE_ARG_REG (mode));
9056 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9057 || PASS_IN_REG_P (*ca, mode, type))
9058 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9059 = (ROUND_REG (*ca, mode)
9060 + (mode == BLKmode
9061 ? ROUND_ADVANCE (int_size_in_bytes (type))
9062 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
9065 /* The Renesas calling convention doesn't quite fit into this scheme since
9066 the address is passed like an invisible argument, but one that is always
9067 passed in memory. */
9068 static rtx
9069 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9071 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9072 return NULL_RTX;
9073 return gen_rtx_REG (Pmode, 2);
9076 /* Worker function for TARGET_FUNCTION_VALUE.
9078 For the SH, this is like LIBCALL_VALUE, except that we must change the
9079 mode like PROMOTE_MODE does.
9080 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9081 tested here has to be kept in sync with the one in
9082 explow.c:promote_mode. */
9083 static rtx
9084 sh_function_value (const_tree valtype,
9085 const_tree fn_decl_or_type,
9086 bool outgoing ATTRIBUTE_UNUSED)
9088 if (fn_decl_or_type
9089 && !DECL_P (fn_decl_or_type))
9090 fn_decl_or_type = NULL;
9092 return gen_rtx_REG (
9093 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9094 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9095 && (TREE_CODE (valtype) == INTEGER_TYPE
9096 || TREE_CODE (valtype) == ENUMERAL_TYPE
9097 || TREE_CODE (valtype) == BOOLEAN_TYPE
9098 || TREE_CODE (valtype) == REAL_TYPE
9099 || TREE_CODE (valtype) == OFFSET_TYPE))
9100 && sh_promote_prototypes (fn_decl_or_type)
9101 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9102 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9105 /* Worker function for TARGET_LIBCALL_VALUE. */
9106 static rtx
9107 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9109 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9112 /* Return true if N is a possible register number of function value. */
9113 static bool
9114 sh_function_value_regno_p (const unsigned int regno)
9116 return ((regno) == FIRST_RET_REG
9117 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9118 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9121 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9122 static bool
9123 sh_return_in_memory (const_tree type, const_tree fndecl)
9125 if (TARGET_SH5)
9127 if (TYPE_MODE (type) == BLKmode)
9128 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9129 else
9130 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9132 else
9134 return (TYPE_MODE (type) == BLKmode
9135 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9136 && TREE_CODE (type) == RECORD_TYPE));
9140 /* We actually emit the code in sh_expand_prologue. We used to use
9141 a static variable to flag that we need to emit this code, but that
9142 doesn't when inlining, when functions are deferred and then emitted
9143 later. Fortunately, we already have two flags that are part of struct
9144 function that tell if a function uses varargs or stdarg. */
9145 static void
9146 sh_setup_incoming_varargs (cumulative_args_t ca,
9147 enum machine_mode mode,
9148 tree type,
9149 int *pretend_arg_size,
9150 int second_time ATTRIBUTE_UNUSED)
9152 gcc_assert (cfun->stdarg);
9153 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9155 int named_parm_regs, anon_parm_regs;
9157 named_parm_regs = (ROUND_REG (*get_cumulative_args (ca), mode)
9158 + (mode == BLKmode
9159 ? ROUND_ADVANCE (int_size_in_bytes (type))
9160 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
9161 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9162 if (anon_parm_regs > 0)
9163 *pretend_arg_size = anon_parm_regs * 4;
9167 static bool
9168 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9170 return TARGET_SH5;
9173 static bool
9174 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9176 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9178 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9182 /* Define the offset between two registers, one to be eliminated, and
9183 the other its replacement, at the start of a routine. */
9185 initial_elimination_offset (int from, int to)
9187 int regs_saved;
9188 int regs_saved_rounding = 0;
9189 int total_saved_regs_space;
9190 int total_auto_space;
9191 int save_flags = target_flags;
9192 int copy_flags;
9193 HARD_REG_SET live_regs_mask;
9195 shmedia_space_reserved_for_target_registers = false;
9196 regs_saved = calc_live_regs (&live_regs_mask);
9197 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9199 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9201 shmedia_space_reserved_for_target_registers = true;
9202 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9205 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9206 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9207 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9209 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9210 copy_flags = target_flags;
9211 target_flags = save_flags;
9213 total_saved_regs_space = regs_saved + regs_saved_rounding;
9215 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9216 return total_saved_regs_space + total_auto_space
9217 + crtl->args.info.byref_regs * 8;
9219 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9220 return total_saved_regs_space + total_auto_space
9221 + crtl->args.info.byref_regs * 8;
9223 /* Initial gap between fp and sp is 0. */
9224 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9225 return 0;
9227 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9228 return rounded_frame_size (0);
9230 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9231 return rounded_frame_size (0);
9233 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9234 && (to == HARD_FRAME_POINTER_REGNUM
9235 || to == STACK_POINTER_REGNUM));
9236 if (TARGET_SH5)
9238 int n = total_saved_regs_space;
9239 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9240 save_schedule schedule;
9241 save_entry *entry;
9243 n += total_auto_space;
9245 /* If it wasn't saved, there's not much we can do. */
9246 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9247 return n;
9249 target_flags = copy_flags;
9251 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9252 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9253 if (entry->reg == pr_reg)
9255 target_flags = save_flags;
9256 return entry->offset;
9258 gcc_unreachable ();
9260 else
9261 return total_auto_space;
9264 /* Parse the -mfixed-range= option string. */
9265 void
9266 sh_fix_range (const char *const_str)
9268 int i, first, last;
9269 char *str, *dash, *comma;
9271 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9272 REG2 are either register names or register numbers. The effect
9273 of this option is to mark the registers in the range from REG1 to
9274 REG2 as ``fixed'' so they won't be used by the compiler. */
9276 i = strlen (const_str);
9277 str = (char *) alloca (i + 1);
9278 memcpy (str, const_str, i + 1);
9280 while (1)
9282 dash = strchr (str, '-');
9283 if (!dash)
9285 warning (0, "value of -mfixed-range must have form REG1-REG2");
9286 return;
9288 *dash = '\0';
9289 comma = strchr (dash + 1, ',');
9290 if (comma)
9291 *comma = '\0';
9293 first = decode_reg_name (str);
9294 if (first < 0)
9296 warning (0, "unknown register name: %s", str);
9297 return;
9300 last = decode_reg_name (dash + 1);
9301 if (last < 0)
9303 warning (0, "unknown register name: %s", dash + 1);
9304 return;
9307 *dash = '-';
9309 if (first > last)
9311 warning (0, "%s-%s is an empty range", str, dash + 1);
9312 return;
9315 for (i = first; i <= last; ++i)
9316 fixed_regs[i] = call_used_regs[i] = 1;
9318 if (!comma)
9319 break;
9321 *comma = ',';
9322 str = comma + 1;
9326 /* Insert any deferred function attributes from earlier pragmas. */
9327 static void
9328 sh_insert_attributes (tree node, tree *attributes)
9330 tree attrs;
9332 if (TREE_CODE (node) != FUNCTION_DECL)
9333 return;
9335 /* We are only interested in fields. */
9336 if (!DECL_P (node))
9337 return;
9339 /* Append the attributes to the deferred attributes. */
9340 *sh_deferred_function_attributes_tail = *attributes;
9341 attrs = sh_deferred_function_attributes;
9342 if (!attrs)
9343 return;
9345 /* Some attributes imply or require the interrupt attribute. */
9346 if (!lookup_attribute ("interrupt_handler", attrs)
9347 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9349 /* If we have a trapa_handler, but no interrupt_handler attribute,
9350 insert an interrupt_handler attribute. */
9351 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9352 /* We can't use sh_pr_interrupt here because that's not in the
9353 java frontend. */
9354 attrs
9355 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9356 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9357 if the interrupt attribute is missing, we ignore the attribute
9358 and warn. */
9359 else if (lookup_attribute ("sp_switch", attrs)
9360 || lookup_attribute ("trap_exit", attrs)
9361 || lookup_attribute ("nosave_low_regs", attrs)
9362 || lookup_attribute ("resbank", attrs))
9364 tree *tail;
9366 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9368 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9369 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9370 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9371 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9372 warning (OPT_Wattributes,
9373 "%qE attribute only applies to interrupt functions",
9374 TREE_PURPOSE (attrs));
9375 else
9377 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9378 NULL_TREE);
9379 tail = &TREE_CHAIN (*tail);
9382 attrs = *attributes;
9386 /* Install the processed list. */
9387 *attributes = attrs;
9389 /* Clear deferred attributes. */
9390 sh_deferred_function_attributes = NULL_TREE;
9391 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9393 return;
9396 /*------------------------------------------------------------------------------
9397 Target specific attributes
9398 Supported attributes are:
9400 * interrupt_handler
9401 Specifies this function is an interrupt handler.
9403 * trapa_handler
9404 Like interrupt_handler, but don't save all registers.
9406 * sp_switch
9407 Specifies an alternate stack for an interrupt handler to run on.
9409 * trap_exit
9410 Use a trapa to exit an interrupt function instead of rte.
9412 * nosave_low_regs
9413 Don't save r0..r7 in an interrupt handler function.
9414 This is useful on SH3* and SH4*, which have a separate set of low
9415 regs for user and privileged modes.
9416 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9417 those that run with interrupts disabled and thus can't be
9418 interrupted thenselves).
9420 * renesas
9421 Use Renesas calling/layout conventions (functions and structures).
9423 * resbank
9424 In case of an interrupt handler function, use a register bank to
9425 save registers R0-R14, MACH, MACL, GBR and PR.
9426 This is available only on SH2A targets.
9428 * function_vector
9429 Declares a function to be called using the TBR relative addressing
9430 mode. Takes an argument that specifies the slot number in the table
9431 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9434 /* Handle a 'resbank' attribute. */
9435 static tree
9436 sh_handle_resbank_handler_attribute (tree * node, tree name,
9437 tree args ATTRIBUTE_UNUSED,
9438 int flags ATTRIBUTE_UNUSED,
9439 bool * no_add_attrs)
9441 if (!TARGET_SH2A)
9443 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9444 name);
9445 *no_add_attrs = true;
9447 if (TREE_CODE (*node) != FUNCTION_DECL)
9449 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9450 name);
9451 *no_add_attrs = true;
9454 return NULL_TREE;
9457 /* Handle an "interrupt_handler" attribute; arguments as in
9458 struct attribute_spec.handler. */
9459 static tree
9460 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9461 tree args ATTRIBUTE_UNUSED,
9462 int flags ATTRIBUTE_UNUSED,
9463 bool *no_add_attrs)
9465 if (TREE_CODE (*node) != FUNCTION_DECL)
9467 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9468 name);
9469 *no_add_attrs = true;
9471 else if (TARGET_SHCOMPACT)
9473 error ("attribute interrupt_handler is not compatible with -m5-compact");
9474 *no_add_attrs = true;
9477 return NULL_TREE;
9480 /* Handle an 'function_vector' attribute; arguments as in
9481 struct attribute_spec.handler. */
9482 static tree
9483 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9484 tree args ATTRIBUTE_UNUSED,
9485 int flags ATTRIBUTE_UNUSED,
9486 bool * no_add_attrs)
9488 if (!TARGET_SH2A)
9490 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9491 name);
9492 *no_add_attrs = true;
9494 else if (TREE_CODE (*node) != FUNCTION_DECL)
9496 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9497 name);
9498 *no_add_attrs = true;
9500 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9502 /* The argument must be a constant integer. */
9503 warning (OPT_Wattributes,
9504 "%qE attribute argument not an integer constant",
9505 name);
9506 *no_add_attrs = true;
9508 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9510 /* The argument value must be between 0 to 255. */
9511 warning (OPT_Wattributes,
9512 "%qE attribute argument should be between 0 to 255",
9513 name);
9514 *no_add_attrs = true;
9516 return NULL_TREE;
9519 /* Returns true if current function has been assigned the attribute
9520 'function_vector'. */
9521 bool
9522 sh2a_is_function_vector_call (rtx x)
9524 if (GET_CODE (x) == SYMBOL_REF
9525 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9527 tree tr = SYMBOL_REF_DECL (x);
9529 if (sh2a_function_vector_p (tr))
9530 return true;
9533 return false;
9536 /* Returns the function vector number, if the attribute
9537 'function_vector' is assigned, otherwise returns zero. */
9539 sh2a_get_function_vector_number (rtx x)
9541 int num;
9542 tree list, t;
9544 if ((GET_CODE (x) == SYMBOL_REF)
9545 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9547 t = SYMBOL_REF_DECL (x);
9549 if (TREE_CODE (t) != FUNCTION_DECL)
9550 return 0;
9552 list = SH_ATTRIBUTES (t);
9553 while (list)
9555 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9557 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9558 return num;
9561 list = TREE_CHAIN (list);
9564 return 0;
9566 else
9567 return 0;
9570 /* Handle an "sp_switch" attribute; arguments as in
9571 struct attribute_spec.handler. */
9572 static tree
9573 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9574 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9576 if (TREE_CODE (*node) != FUNCTION_DECL)
9578 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9579 name);
9580 *no_add_attrs = true;
9582 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9584 /* The argument must be a constant string. */
9585 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9586 name);
9587 *no_add_attrs = true;
9590 return NULL_TREE;
9593 /* Handle an "trap_exit" attribute; arguments as in
9594 struct attribute_spec.handler. */
9595 static tree
9596 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9597 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9599 if (TREE_CODE (*node) != FUNCTION_DECL)
9601 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9602 name);
9603 *no_add_attrs = true;
9605 /* The argument specifies a trap number to be used in a trapa instruction
9606 at function exit (instead of an rte instruction). */
9607 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9609 /* The argument must be a constant integer. */
9610 warning (OPT_Wattributes, "%qE attribute argument not an "
9611 "integer constant", name);
9612 *no_add_attrs = true;
9615 return NULL_TREE;
9618 static tree
9619 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9620 tree name ATTRIBUTE_UNUSED,
9621 tree args ATTRIBUTE_UNUSED,
9622 int flags ATTRIBUTE_UNUSED,
9623 bool *no_add_attrs ATTRIBUTE_UNUSED)
9625 return NULL_TREE;
9628 /* True if __attribute__((renesas)) or -mrenesas. */
9629 bool
9630 sh_attr_renesas_p (const_tree td)
9632 if (TARGET_HITACHI)
9633 return true;
9634 if (td == NULL_TREE)
9635 return false;
9636 if (DECL_P (td))
9637 td = TREE_TYPE (td);
9638 if (td == error_mark_node)
9639 return false;
9640 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9641 != NULL_TREE);
9644 /* True if __attribute__((renesas)) or -mrenesas, for the current
9645 function. */
9646 bool
9647 sh_cfun_attr_renesas_p (void)
9649 return sh_attr_renesas_p (current_function_decl);
9652 /* Returns true if the current function has the "interrupt_handler"
9653 attribute set. */
9654 bool
9655 sh_cfun_interrupt_handler_p (void)
9657 return (lookup_attribute ("interrupt_handler",
9658 DECL_ATTRIBUTES (current_function_decl))
9659 != NULL_TREE);
9662 /* Returns true if FUNC has been assigned the attribute
9663 "function_vector". */
9664 bool
9665 sh2a_function_vector_p (tree func)
9667 tree list;
9668 if (TREE_CODE (func) != FUNCTION_DECL)
9669 return false;
9671 list = SH_ATTRIBUTES (func);
9672 while (list)
9674 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9675 return true;
9677 list = TREE_CHAIN (list);
9679 return false;
9682 /* Returns true if given tree has the "resbank" attribute set. */
9683 bool
9684 sh_cfun_resbank_handler_p (void)
9686 return ((lookup_attribute ("resbank",
9687 DECL_ATTRIBUTES (current_function_decl))
9688 != NULL_TREE)
9689 && (lookup_attribute ("interrupt_handler",
9690 DECL_ATTRIBUTES (current_function_decl))
9691 != NULL_TREE) && TARGET_SH2A);
9694 /* Returns true if the current function has a "trap_exit" attribute set. */
9695 bool
9696 sh_cfun_trap_exit_p (void)
9698 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9699 != NULL_TREE;
9702 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9703 static const char *
9704 sh_check_pch_target_flags (int old_flags)
9706 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9707 | MASK_SH_E | MASK_HARD_SH4
9708 | MASK_FPU_SINGLE | MASK_SH4))
9709 return _("created and used with different architectures / ABIs");
9710 if ((old_flags ^ target_flags) & MASK_HITACHI)
9711 return _("created and used with different ABIs");
9712 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9713 return _("created and used with different endianness");
9714 return NULL;
9717 /* Predicates used by the templates. */
9719 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9720 Used only in general_movsrc_operand. */
9721 bool
9722 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9724 switch (REGNO (op))
9726 case PR_REG:
9727 case MACL_REG:
9728 case MACH_REG:
9729 return true;
9731 return false;
9734 /* Returns true if OP is a floating point value with value 0.0. */
9735 bool
9736 fp_zero_operand (rtx op)
9738 REAL_VALUE_TYPE r;
9740 if (GET_MODE (op) != SFmode)
9741 return false;
9743 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9744 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9747 /* Returns true if OP is a floating point value with value 1.0. */
9748 bool
9749 fp_one_operand (rtx op)
9751 REAL_VALUE_TYPE r;
9753 if (GET_MODE (op) != SFmode)
9754 return false;
9756 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9757 return REAL_VALUES_EQUAL (r, dconst1);
9760 /* In general mode switching is used. If we are
9761 compiling without -mfmovd, movsf_ie isn't taken into account for
9762 mode switching. We could check in machine_dependent_reorg for
9763 cases where we know we are in single precision mode, but there is
9764 interface to find that out during reload, so we must avoid
9765 choosing an fldi alternative during reload and thus failing to
9766 allocate a scratch register for the constant loading. */
9767 bool
9768 fldi_ok (void)
9770 return true;
9773 /* Return the TLS type for TLS symbols. */
9774 enum tls_model
9775 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9777 if (GET_CODE (op) != SYMBOL_REF)
9778 return TLS_MODEL_NONE;
9779 return SYMBOL_REF_TLS_MODEL (op);
9782 /* Return the destination address of a branch. */
9783 static int
9784 branch_dest (rtx branch)
9786 rtx dest = SET_SRC (PATTERN (branch));
9787 int dest_uid;
9789 if (GET_CODE (dest) == IF_THEN_ELSE)
9790 dest = XEXP (dest, 1);
9791 dest = XEXP (dest, 0);
9792 dest_uid = INSN_UID (dest);
9793 return INSN_ADDRESSES (dest_uid);
9796 /* Return nonzero if REG is not used after INSN.
9797 We assume REG is a reload reg, and therefore does
9798 not live past labels. It may live past calls or jumps though. */
9799 bool
9800 reg_unused_after (rtx reg, rtx insn)
9802 enum rtx_code code;
9803 rtx set;
9805 /* If the reg is set by this instruction, then it is safe for our
9806 case. Disregard the case where this is a store to memory, since
9807 we are checking a register used in the store address. */
9808 set = single_set (insn);
9809 if (set && !MEM_P (SET_DEST (set))
9810 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9811 return true;
9813 while ((insn = NEXT_INSN (insn)))
9815 rtx set;
9816 if (!INSN_P (insn))
9817 continue;
9819 code = GET_CODE (insn);
9821 #if 0
9822 /* If this is a label that existed before reload, then the register
9823 is dead here. However, if this is a label added by reorg, then
9824 the register may still be live here. We can't tell the difference,
9825 so we just ignore labels completely. */
9826 if (code == CODE_LABEL)
9827 return 1;
9828 /* else */
9829 #endif
9831 if (code == JUMP_INSN)
9832 return false;
9834 /* If this is a sequence, we must handle them all at once.
9835 We could have for instance a call that sets the target register,
9836 and an insn in a delay slot that uses the register. In this case,
9837 we must return 0. */
9838 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9840 int i;
9841 int retval = 0;
9843 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
9845 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
9846 rtx set = single_set (this_insn);
9848 if (CALL_P (this_insn))
9849 code = CALL_INSN;
9850 else if (JUMP_P (this_insn))
9852 if (INSN_ANNULLED_BRANCH_P (this_insn))
9853 return false;
9854 code = JUMP_INSN;
9857 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9858 return false;
9859 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9861 if (!MEM_P (SET_DEST (set)))
9862 retval = true;
9863 else
9864 return false;
9866 if (set == NULL_RTX
9867 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9868 return false;
9870 if (retval == 1)
9871 return true;
9872 else if (code == JUMP_INSN)
9873 return false;
9876 set = single_set (insn);
9877 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9878 return false;
9879 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9880 return !MEM_P (SET_DEST (set));
9881 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
9882 return false;
9884 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
9885 return true;
9887 return true;
9890 #include "ggc.h"
9892 static GTY(()) rtx t_reg_rtx;
9894 get_t_reg_rtx (void)
9896 if (! t_reg_rtx)
9897 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
9898 return t_reg_rtx;
9901 static GTY(()) rtx fpscr_rtx;
9903 get_fpscr_rtx (void)
9905 if (! fpscr_rtx)
9907 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
9908 REG_USERVAR_P (fpscr_rtx) = 1;
9909 mark_user_reg (fpscr_rtx);
9911 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
9912 mark_user_reg (fpscr_rtx);
9913 return fpscr_rtx;
9916 static GTY(()) tree fpscr_values;
9918 static void
9919 emit_fpu_switch (rtx scratch, int index)
9921 rtx dst, src;
9923 if (fpscr_values == NULL)
9925 tree t;
9927 t = build_index_type (integer_one_node);
9928 t = build_array_type (integer_type_node, t);
9929 t = build_decl (BUILTINS_LOCATION,
9930 VAR_DECL, get_identifier ("__fpscr_values"), t);
9931 DECL_ARTIFICIAL (t) = 1;
9932 DECL_IGNORED_P (t) = 1;
9933 DECL_EXTERNAL (t) = 1;
9934 TREE_STATIC (t) = 1;
9935 TREE_PUBLIC (t) = 1;
9936 TREE_USED (t) = 1;
9938 fpscr_values = t;
9941 src = DECL_RTL (fpscr_values);
9942 if (!can_create_pseudo_p ())
9944 emit_move_insn (scratch, XEXP (src, 0));
9945 if (index != 0)
9946 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
9947 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
9949 else
9950 src = adjust_address (src, PSImode, index * 4);
9952 dst = get_fpscr_rtx ();
9953 emit_move_insn (dst, src);
9956 void
9957 emit_sf_insn (rtx pat)
9959 emit_insn (pat);
9962 void
9963 emit_df_insn (rtx pat)
9965 emit_insn (pat);
9968 void
9969 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9971 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9974 void
9975 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9977 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
9978 get_fpscr_rtx ()));
9981 void
9982 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
9984 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
9987 void
9988 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
9990 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
9991 get_fpscr_rtx ()));
9994 static rtx get_free_reg (HARD_REG_SET);
9996 /* This function returns a register to use to load the address to load
9997 the fpscr from. Currently it always returns r1 or r7, but when we are
9998 able to use pseudo registers after combine, or have a better mechanism
9999 for choosing a register, it should be done here. */
10000 /* REGS_LIVE is the liveness information for the point for which we
10001 need this allocation. In some bare-bones exit blocks, r1 is live at the
10002 start. We can even have all of r0..r3 being live:
10003 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10004 INSN before which new insns are placed with will clobber the register
10005 we return. If a basic block consists only of setting the return value
10006 register to a pseudo and using that register, the return value is not
10007 live before or after this block, yet we we'll insert our insns right in
10008 the middle. */
10009 static rtx
10010 get_free_reg (HARD_REG_SET regs_live)
10012 if (! TEST_HARD_REG_BIT (regs_live, 1))
10013 return gen_rtx_REG (Pmode, 1);
10015 /* Hard reg 1 is live; since this is a small register classes target,
10016 there shouldn't be anything but a jump before the function end. */
10017 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10018 return gen_rtx_REG (Pmode, 7);
10021 /* This function will set the fpscr from memory.
10022 MODE is the mode we are setting it to. */
10023 void
10024 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10026 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10027 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10028 rtx addr_reg;
10030 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10031 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10034 /* Is the given character a logical line separator for the assembler? */
10035 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10036 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10037 #endif
10039 static bool
10040 sequence_insn_p (rtx insn)
10042 rtx prev, next;
10044 prev = PREV_INSN (insn);
10045 if (prev == NULL)
10046 return false;
10048 next = NEXT_INSN (prev);
10049 if (next == NULL)
10050 return false;
10052 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10056 sh_insn_length_adjustment (rtx insn)
10058 /* Instructions with unfilled delay slots take up an extra two bytes for
10059 the nop in the delay slot. */
10060 if (((NONJUMP_INSN_P (insn)
10061 && GET_CODE (PATTERN (insn)) != USE
10062 && GET_CODE (PATTERN (insn)) != CLOBBER)
10063 || CALL_P (insn) || JUMP_P (insn))
10064 && ! sequence_insn_p (insn)
10065 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10066 return 2;
10068 /* SH2e has a bug that prevents the use of annulled branches, so if
10069 the delay slot is not filled, we'll have to put a NOP in it. */
10070 if (sh_cpu_attr == CPU_SH2E
10071 && JUMP_P (insn)
10072 && get_attr_type (insn) == TYPE_CBRANCH
10073 && ! sequence_insn_p (insn))
10074 return 2;
10076 /* sh-dsp parallel processing insn take four bytes instead of two. */
10078 if (NONJUMP_INSN_P (insn))
10080 int sum = 0;
10081 rtx body = PATTERN (insn);
10082 const char *templ;
10083 char c;
10084 bool maybe_label = true;
10086 if (GET_CODE (body) == ASM_INPUT)
10087 templ = XSTR (body, 0);
10088 else if (asm_noperands (body) >= 0)
10089 templ
10090 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10091 else
10092 return 0;
10095 int ppi_adjust = 0;
10098 c = *templ++;
10099 while (c == ' ' || c == '\t');
10100 /* all sh-dsp parallel-processing insns start with p.
10101 The only non-ppi sh insn starting with p is pref.
10102 The only ppi starting with pr is prnd. */
10103 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10104 ppi_adjust = 2;
10105 /* The repeat pseudo-insn expands two three insns, a total of
10106 six bytes in size. */
10107 else if ((c == 'r' || c == 'R')
10108 && ! strncasecmp ("epeat", templ, 5))
10109 ppi_adjust = 4;
10110 while (c && c != '\n'
10111 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10113 /* If this is a label, it is obviously not a ppi insn. */
10114 if (c == ':' && maybe_label)
10116 ppi_adjust = 0;
10117 break;
10119 else if (c == '\'' || c == '"')
10120 maybe_label = false;
10121 c = *templ++;
10123 sum += ppi_adjust;
10124 maybe_label = c != ':';
10126 while (c);
10127 return sum;
10129 return 0;
10132 /* Return TRUE for a valid displacement for the REG+disp addressing
10133 with MODE. */
10134 bool
10135 sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
10136 bool allow_zero)
10138 if (! CONST_INT_P (op))
10139 return false;
10141 if (TARGET_SHMEDIA)
10143 int size;
10145 /* Check if this is the address of an unaligned load / store. */
10146 if (mode == VOIDmode)
10147 return satisfies_constraint_I06 (op);
10149 size = GET_MODE_SIZE (mode);
10150 return (!(INTVAL (op) & (size - 1))
10151 && INTVAL (op) >= -512 * size
10152 && INTVAL (op) < 512 * size);
10154 else
10156 const HOST_WIDE_INT offset = INTVAL (op);
10157 const int max_disp = max_mov_insn_displacement (mode, consider_sh2a);
10158 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10160 /* If the mode does not support any displacement always return false.
10161 Even though an index of '0' is actually always valid, it will cause
10162 troubles when e.g. a DFmode move is split into two SFmode moves,
10163 where one SFmode move will have index '0' and the other move will
10164 have index '4'. */
10165 if (!allow_zero && max_disp < 1)
10166 return false;
10168 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10172 /* Recognize an RTL expression that is a valid memory address for
10173 an instruction.
10174 The MODE argument is the machine mode for the MEM expression
10175 that wants to use this address.
10176 Allow REG
10177 REG+disp
10178 REG+r0
10179 REG++
10180 --REG
10182 GBR+disp */
10183 static bool
10184 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
10186 if (REG_P (x) && REGNO (x) == GBR_REG)
10187 return true;
10189 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10190 return true;
10191 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10192 && ! TARGET_SHMEDIA
10193 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10194 return true;
10195 else if (GET_CODE (x) == PLUS
10196 && (mode != PSImode || reload_completed))
10198 rtx xop0 = XEXP (x, 0);
10199 rtx xop1 = XEXP (x, 1);
10201 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10202 return gbr_displacement (xop1, mode);
10204 if (GET_MODE_SIZE (mode) <= 8
10205 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10206 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10207 return true;
10209 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10210 || ((xop0 == stack_pointer_rtx
10211 || xop0 == hard_frame_pointer_rtx)
10212 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10213 || ((xop1 == stack_pointer_rtx
10214 || xop1 == hard_frame_pointer_rtx)
10215 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10216 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10217 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10218 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10219 && TARGET_FMOVD && mode == DFmode)))
10221 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10222 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10223 return true;
10224 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10225 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10226 return true;
10230 return false;
10233 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10234 isn't protected by a PIC unspec. */
10235 bool
10236 nonpic_symbol_mentioned_p (rtx x)
10238 const char *fmt;
10239 int i;
10241 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10242 || GET_CODE (x) == PC)
10243 return true;
10245 /* We don't want to look into the possible MEM location of a
10246 CONST_DOUBLE, since we're not going to use it, in general. */
10247 if (GET_CODE (x) == CONST_DOUBLE)
10248 return false;
10250 if (GET_CODE (x) == UNSPEC
10251 && (XINT (x, 1) == UNSPEC_PIC
10252 || XINT (x, 1) == UNSPEC_GOT
10253 || XINT (x, 1) == UNSPEC_GOTOFF
10254 || XINT (x, 1) == UNSPEC_GOTPLT
10255 || XINT (x, 1) == UNSPEC_GOTTPOFF
10256 || XINT (x, 1) == UNSPEC_DTPOFF
10257 || XINT (x, 1) == UNSPEC_TPOFF
10258 || XINT (x, 1) == UNSPEC_PLT
10259 || XINT (x, 1) == UNSPEC_SYMOFF
10260 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10261 return false;
10263 fmt = GET_RTX_FORMAT (GET_CODE (x));
10264 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10266 if (fmt[i] == 'E')
10268 int j;
10269 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10270 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10271 return true;
10273 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10274 return true;
10277 return false;
10280 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10281 @GOTOFF in `reg'. */
10283 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
10284 rtx reg)
10286 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10287 return orig;
10289 if (GET_CODE (orig) == LABEL_REF
10290 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10292 if (reg == NULL_RTX)
10293 reg = gen_reg_rtx (Pmode);
10295 emit_insn (gen_symGOTOFF2reg (reg, orig));
10296 return reg;
10298 else if (GET_CODE (orig) == SYMBOL_REF)
10300 if (reg == NULL_RTX)
10301 reg = gen_reg_rtx (Pmode);
10303 emit_insn (gen_symGOT2reg (reg, orig));
10304 return reg;
10306 return orig;
10309 /* Given a (logical) mode size and an offset in bytes, try to find a the
10310 appropriate displacement value for a mov insn. On SH the displacements
10311 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10312 15 bytes in QImode. To compensate this we create a new base address by
10313 adding an adjustment value to it.
10315 If the originally requested offset is greater than 127 we prefer using
10316 values 124..127 over 128..131 to increase opportunities to use the
10317 add #imm, Rn insn.
10319 In some cases it is possible that a requested offset might seem unaligned
10320 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10321 This is compensated by adjusting the base address so that the effective
10322 address of the displacement move insn will be aligned.
10324 This is not the best possible way of rebasing the base address, as it
10325 does not look at other present displacement addressings around it.
10326 In some cases this can create more base address adjustments than would
10327 actually be necessary. */
10328 struct disp_adjust
10330 rtx offset_adjust;
10331 rtx mov_disp;
10334 static struct disp_adjust
10335 sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
10337 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10339 /* Do not try to use SH2A's large displacements here, because this would
10340 effectively disable the small displacement insns. */
10341 const int mode_sz = GET_MODE_SIZE (mode);
10342 const int mov_insn_sz = mov_insn_size (mode, false);
10343 const int max_disp = max_mov_insn_displacement (mode, false);
10344 const int max_disp_next = max_disp + mov_insn_sz;
10345 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10346 HOST_WIDE_INT offset_adjust;
10348 /* In some cases this actually does happen and we must check for it. */
10349 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10350 return res;
10352 /* Keeps the previous behavior for QImode displacement addressing.
10353 This just decides how the offset is re-based. Removing this special
10354 case will result in slightly bigger code on average, but it's not that
10355 bad actually. */
10356 if (mov_insn_sz == 1)
10357 align_modifier = 0;
10359 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10361 if (mode_sz + offset - offset_adjust <= max_disp_next)
10363 res.offset_adjust = GEN_INT (offset_adjust);
10364 res.mov_disp = GEN_INT (offset - offset_adjust);
10367 return res;
10370 /* Try to modify an illegitimate address and make it legitimate.
10371 If we find one, return the new, valid address.
10372 Otherwise, return the original address. */
10373 static rtx
10374 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
10376 if (flag_pic)
10377 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10379 if (TARGET_SHMEDIA)
10380 return x;
10382 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10383 || (TARGET_SH2E && mode == SFmode))
10384 return x;
10386 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10387 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10389 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10390 INTVAL (XEXP (x, 1)));
10392 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10394 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10395 adj.offset_adjust, NULL_RTX, 0,
10396 OPTAB_LIB_WIDEN);
10397 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10401 return x;
10404 /* Attempt to replace *p, which is an address that needs reloading, with
10405 a valid memory address for an operand of mode MODE.
10406 Like for sh_legitimize_address, for the SH we try to get a normal form
10407 of the address. That will allow inheritance of the address reloads. */
10408 bool
10409 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
10410 int itype)
10412 enum reload_type type = (enum reload_type) itype;
10413 const int mode_sz = GET_MODE_SIZE (mode);
10415 if (TARGET_SHMEDIA)
10416 return false;
10418 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10419 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10420 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
10421 && (ALLOW_INDEXED_ADDRESS
10422 || XEXP (*p, 0) == stack_pointer_rtx
10423 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10425 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10426 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10428 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10430 push_reload (*p, NULL_RTX, p, NULL,
10431 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10432 return true;
10435 if (TARGET_SH2E && mode == SFmode)
10437 *p = copy_rtx (*p);
10438 push_reload (*p, NULL_RTX, p, NULL,
10439 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10440 return true;
10443 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10444 moves because then reload has a problem figuring the constraint
10445 that the move insn target/source reg must be R0.
10446 Or maybe some handling is wrong in sh_secondary_reload for this
10447 to work properly? */
10448 if ((mode_sz == 4 || mode_sz == 8)
10449 && ! (TARGET_SH4 && mode == DFmode)
10450 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10452 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10453 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10454 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10455 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10456 return true;
10460 /* We must re-recognize what we created before. */
10461 if (GET_CODE (*p) == PLUS
10462 && (mode_sz == 4 || mode_sz == 8)
10463 && GET_CODE (XEXP (*p, 0)) == PLUS
10464 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10465 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10466 && CONST_INT_P (XEXP (*p, 1))
10467 && ! (TARGET_SH2E && mode == SFmode))
10469 /* Because this address is so complex, we know it must have
10470 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10471 it is already unshared, and needs no further unsharing. */
10472 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10473 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10474 return true;
10477 return false;
10480 /* In the name of slightly smaller debug output, and to cater to
10481 general assembler lossage, recognize various UNSPEC sequences
10482 and turn them back into a direct symbol reference. */
10483 static rtx
10484 sh_delegitimize_address (rtx orig_x)
10486 rtx x, y;
10488 orig_x = delegitimize_mem_from_attrs (orig_x);
10490 x = orig_x;
10491 if (MEM_P (x))
10492 x = XEXP (x, 0);
10493 if (GET_CODE (x) == CONST)
10495 y = XEXP (x, 0);
10496 if (GET_CODE (y) == UNSPEC)
10498 if (XINT (y, 1) == UNSPEC_GOT
10499 || XINT (y, 1) == UNSPEC_GOTOFF
10500 || XINT (y, 1) == UNSPEC_SYMOFF)
10501 return XVECEXP (y, 0, 0);
10502 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10504 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10506 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10508 if (GET_CODE (symplt) == UNSPEC
10509 && XINT (symplt, 1) == UNSPEC_PLT)
10510 return XVECEXP (symplt, 0, 0);
10513 else if (TARGET_SHMEDIA
10514 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10515 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10517 rtx offset = XVECEXP (y, 0, 1);
10519 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10520 if (MEM_P (orig_x))
10521 x = replace_equiv_address_nv (orig_x, x);
10522 return x;
10527 return orig_x;
10530 /* Mark the use of a constant in the literal table. If the constant
10531 has multiple labels, make it unique. */
10532 static rtx
10533 mark_constant_pool_use (rtx x)
10535 rtx insn, lab, pattern;
10537 if (x == NULL_RTX)
10538 return x;
10540 switch (GET_CODE (x))
10542 case LABEL_REF:
10543 x = XEXP (x, 0);
10544 case CODE_LABEL:
10545 break;
10546 default:
10547 return x;
10550 /* Get the first label in the list of labels for the same constant
10551 and delete another labels in the list. */
10552 lab = x;
10553 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
10555 if (!LABEL_P (insn)
10556 || LABEL_REFS (insn) != NEXT_INSN (insn))
10557 break;
10558 lab = insn;
10561 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10562 INSN_DELETED_P (insn) = 1;
10564 /* Mark constants in a window. */
10565 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
10567 if (!NONJUMP_INSN_P (insn))
10568 continue;
10570 pattern = PATTERN (insn);
10571 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10572 continue;
10574 switch (XINT (pattern, 1))
10576 case UNSPECV_CONST2:
10577 case UNSPECV_CONST4:
10578 case UNSPECV_CONST8:
10579 XVECEXP (pattern, 0, 1) = const1_rtx;
10580 break;
10581 case UNSPECV_WINDOW_END:
10582 if (XVECEXP (pattern, 0, 0) == x)
10583 return lab;
10584 break;
10585 case UNSPECV_CONST_END:
10586 return lab;
10587 default:
10588 break;
10592 return lab;
10595 /* Return true if it's possible to redirect BRANCH1 to the destination
10596 of an unconditional jump BRANCH2. We only want to do this if the
10597 resulting branch will have a short displacement. */
10598 bool
10599 sh_can_redirect_branch (rtx branch1, rtx branch2)
10601 if (flag_expensive_optimizations && simplejump_p (branch2))
10603 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10604 rtx insn;
10605 int distance;
10607 for (distance = 0, insn = NEXT_INSN (branch1);
10608 insn && distance < 256;
10609 insn = PREV_INSN (insn))
10611 if (insn == dest)
10612 return true;
10613 else
10614 distance += get_attr_length (insn);
10616 for (distance = 0, insn = NEXT_INSN (branch1);
10617 insn && distance < 256;
10618 insn = NEXT_INSN (insn))
10620 if (insn == dest)
10621 return true;
10622 else
10623 distance += get_attr_length (insn);
10626 return false;
10629 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10630 bool
10631 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10632 unsigned int new_reg)
10634 /* Interrupt functions can only use registers that have already been
10635 saved by the prologue, even if they would normally be
10636 call-clobbered. */
10637 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10638 return false;
10640 return true;
10643 /* Function to update the integer COST
10644 based on the relationship between INSN that is dependent on
10645 DEP_INSN through the dependence LINK. The default is to make no
10646 adjustment to COST. This can be used for example to specify to
10647 the scheduler that an output- or anti-dependence does not incur
10648 the same cost as a data-dependence. The return value should be
10649 the new value for COST. */
10650 static int
10651 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
10653 rtx reg, use_pat;
10655 if (TARGET_SHMEDIA)
10657 /* On SHmedia, if the dependence is an anti-dependence or
10658 output-dependence, there is no cost. */
10659 if (REG_NOTE_KIND (link) != 0)
10661 /* However, dependencies between target register loads and
10662 uses of the register in a subsequent block that are separated
10663 by a conditional branch are not modelled - we have to do with
10664 the anti-dependency between the target register load and the
10665 conditional branch that ends the current block. */
10666 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10667 && GET_CODE (PATTERN (dep_insn)) == SET
10668 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10669 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10670 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10672 int orig_cost = cost;
10673 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10674 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10675 ? insn : JUMP_LABEL (insn));
10676 /* On the likely path, the branch costs 1, on the unlikely path,
10677 it costs 3. */
10678 cost--;
10680 target = next_active_insn (target);
10681 while (target && ! flow_dependent_p (target, dep_insn)
10682 && --cost > 0);
10683 /* If two branches are executed in immediate succession, with the
10684 first branch properly predicted, this causes a stall at the
10685 second branch, hence we won't need the target for the
10686 second branch for two cycles after the launch of the first
10687 branch. */
10688 if (cost > orig_cost - 2)
10689 cost = orig_cost - 2;
10691 else
10692 cost = 0;
10695 else if (get_attr_is_mac_media (insn)
10696 && get_attr_is_mac_media (dep_insn))
10697 cost = 1;
10699 else if (! reload_completed
10700 && GET_CODE (PATTERN (insn)) == SET
10701 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10702 && GET_CODE (PATTERN (dep_insn)) == SET
10703 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10704 && cost < 4)
10705 cost = 4;
10706 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10707 that is needed at the target. */
10708 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10709 && ! flow_dependent_p (insn, dep_insn))
10710 cost--;
10712 else if (REG_NOTE_KIND (link) == 0)
10714 enum attr_type type;
10715 rtx dep_set;
10717 if (recog_memoized (insn) < 0
10718 || recog_memoized (dep_insn) < 0)
10719 return cost;
10721 dep_set = single_set (dep_insn);
10723 /* The latency that we specify in the scheduling description refers
10724 to the actual output, not to an auto-increment register; for that,
10725 the latency is one. */
10726 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10728 rtx set = single_set (insn);
10730 if (set
10731 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10732 && (!MEM_P (SET_DEST (set))
10733 || !reg_mentioned_p (SET_DEST (dep_set),
10734 XEXP (SET_DEST (set), 0))))
10735 cost = 1;
10737 /* The only input for a call that is timing-critical is the
10738 function's address. */
10739 if (CALL_P (insn))
10741 rtx call = get_call_rtx_from (insn);
10742 if (call
10743 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10744 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10745 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10746 cost -= TARGET_SH4_300 ? 3 : 6;
10748 /* Likewise, the most timing critical input for an sfuncs call
10749 is the function address. However, sfuncs typically start
10750 using their arguments pretty quickly.
10751 Assume a four cycle delay for SH4 before they are needed.
10752 Cached ST40-300 calls are quicker, so assume only a one
10753 cycle delay there.
10754 ??? Maybe we should encode the delays till input registers
10755 are needed by sfuncs into the sfunc call insn. */
10756 /* All sfunc calls are parallels with at least four components.
10757 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10758 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10759 && XVECLEN (PATTERN (insn), 0) >= 4
10760 && (reg = sfunc_uses_reg (insn)))
10762 if (! reg_set_p (reg, dep_insn))
10763 cost -= TARGET_SH4_300 ? 1 : 4;
10765 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10767 enum attr_type dep_type = get_attr_type (dep_insn);
10769 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10770 cost--;
10771 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10772 && (type = get_attr_type (insn)) != TYPE_CALL
10773 && type != TYPE_SFUNC)
10774 cost--;
10775 /* When the preceding instruction loads the shift amount of
10776 the following SHAD/SHLD, the latency of the load is increased
10777 by 1 cycle. */
10778 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10779 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10780 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10781 XEXP (SET_SRC (single_set (insn)),
10782 1)))
10783 cost++;
10784 /* When an LS group instruction with a latency of less than
10785 3 cycles is followed by a double-precision floating-point
10786 instruction, FIPR, or FTRV, the latency of the first
10787 instruction is increased to 3 cycles. */
10788 else if (cost < 3
10789 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10790 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10791 cost = 3;
10792 /* The lsw register of a double-precision computation is ready one
10793 cycle earlier. */
10794 else if (reload_completed
10795 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10796 && (use_pat = single_set (insn))
10797 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10798 SET_SRC (use_pat)))
10799 cost -= 1;
10801 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10802 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10803 cost -= 1;
10805 else if (TARGET_SH4_300)
10807 /* Stores need their input register two cycles later. */
10808 if (dep_set && cost >= 1
10809 && ((type = get_attr_type (insn)) == TYPE_STORE
10810 || type == TYPE_PSTORE
10811 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10813 rtx set = single_set (insn);
10815 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10816 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10818 cost -= 2;
10819 /* But don't reduce the cost below 1 if the address depends
10820 on a side effect of dep_insn. */
10821 if (cost < 1
10822 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10823 cost = 1;
10828 /* An anti-dependence penalty of two applies if the first insn is a double
10829 precision fadd / fsub / fmul. */
10830 else if (!TARGET_SH4_300
10831 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10832 && recog_memoized (dep_insn) >= 0
10833 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10834 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10835 /* A lot of alleged anti-flow dependences are fake,
10836 so check this one is real. */
10837 && flow_dependent_p (dep_insn, insn))
10838 cost = 2;
10840 return cost;
10843 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10844 if DEP_INSN is anti-flow dependent on INSN. */
10845 static bool
10846 flow_dependent_p (rtx insn, rtx dep_insn)
10848 rtx tmp = PATTERN (insn);
10850 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10851 return tmp == NULL_RTX;
10854 /* A helper function for flow_dependent_p called through note_stores. */
10855 static void
10856 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
10858 rtx * pinsn = (rtx *) data;
10860 if (*pinsn && reg_referenced_p (x, *pinsn))
10861 *pinsn = NULL_RTX;
10864 /* For use by sh_allocate_initial_value. Note that sh.md contains some
10865 'special function' patterns (type sfunc) that clobber pr, but that
10866 do not look like function calls to leaf_function_p. Hence we must
10867 do this extra check. */
10868 static int
10869 sh_pr_n_sets (void)
10871 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10874 /* Return where to allocate pseudo for a given hard register initial
10875 value. */
10876 static rtx
10877 sh_allocate_initial_value (rtx hard_reg)
10879 rtx x;
10881 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
10883 if (crtl->is_leaf
10884 && ! sh_pr_n_sets ()
10885 && ! (TARGET_SHCOMPACT
10886 && ((crtl->args.info.call_cookie
10887 & ~ CALL_COOKIE_RET_TRAMP (1))
10888 || crtl->saves_all_registers)))
10889 x = hard_reg;
10890 else
10891 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
10893 else
10894 x = NULL_RTX;
10896 return x;
10899 /* This function returns "2" to indicate dual issue for the SH4
10900 processor. To be used by the DFA pipeline description. */
10901 static int
10902 sh_issue_rate (void)
10904 if (TARGET_SUPERSCALAR)
10905 return 2;
10906 else
10907 return 1;
10910 /* Functions for ready queue reordering for sched1. */
10912 /* Get weight for mode for a set x. */
10913 static short
10914 find_set_regmode_weight (rtx x, enum machine_mode mode)
10916 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
10917 return 1;
10918 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
10920 if (REG_P (SET_DEST (x)))
10922 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
10923 return 1;
10924 else
10925 return 0;
10927 return 1;
10929 return 0;
10932 /* Get regmode weight for insn. */
10933 static short
10934 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
10936 short reg_weight = 0;
10937 rtx x;
10939 /* Increment weight for each register born here. */
10940 x = PATTERN (insn);
10941 reg_weight += find_set_regmode_weight (x, mode);
10942 if (GET_CODE (x) == PARALLEL)
10944 int j;
10945 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
10947 x = XVECEXP (PATTERN (insn), 0, j);
10948 reg_weight += find_set_regmode_weight (x, mode);
10951 /* Decrement weight for each register that dies here. */
10952 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
10954 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
10956 rtx note = XEXP (x, 0);
10957 if (REG_P (note) && GET_MODE (note) == mode)
10958 reg_weight--;
10961 return reg_weight;
10964 /* Calculate regmode weights for all insns of a basic block. */
10965 static void
10966 find_regmode_weight (basic_block b, enum machine_mode mode)
10968 rtx insn, next_tail, head, tail;
10970 get_ebb_head_tail (b, b, &head, &tail);
10971 next_tail = NEXT_INSN (tail);
10973 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
10975 /* Handle register life information. */
10976 if (!INSN_P (insn))
10977 continue;
10979 if (mode == SFmode)
10980 INSN_REGMODE_WEIGHT (insn, mode) =
10981 find_insn_regmode_weight (insn, mode)
10982 + 2 * find_insn_regmode_weight (insn, DFmode);
10983 else if (mode == SImode)
10984 INSN_REGMODE_WEIGHT (insn, mode) =
10985 find_insn_regmode_weight (insn, mode)
10986 + 2 * find_insn_regmode_weight (insn, DImode);
10990 /* Comparison function for ready queue sorting. */
10991 static int
10992 rank_for_reorder (const void *x, const void *y)
10994 rtx tmp = *(const rtx *) y;
10995 rtx tmp2 = *(const rtx *) x;
10997 /* The insn in a schedule group should be issued the first. */
10998 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
10999 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11001 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11002 minimizes instruction movement, thus minimizing sched's effect on
11003 register pressure. */
11004 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11007 /* Resort the array A in which only element at index N may be out of order. */
11008 static void
11009 swap_reorder (rtx *a, int n)
11011 rtx insn = a[n - 1];
11012 int i = n - 2;
11014 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11016 a[i + 1] = a[i];
11017 i -= 1;
11019 a[i + 1] = insn;
11022 /* Sort the ready list by ascending priority. */
11023 static void
11024 ready_reorder (rtx *ready, int nready)
11026 if (nready == 2)
11027 swap_reorder (ready, nready);
11028 else if (nready > 2)
11029 qsort (ready, nready, sizeof (rtx), rank_for_reorder);
11032 /* Count life regions of r0 for a block. */
11033 static int
11034 find_r0_life_regions (basic_block b)
11036 rtx end, insn;
11037 rtx pset;
11038 rtx r0_reg;
11039 int live;
11040 int set;
11041 int death = 0;
11043 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11045 set = 1;
11046 live = 1;
11048 else
11050 set = 0;
11051 live = 0;
11054 insn = BB_HEAD (b);
11055 end = BB_END (b);
11056 r0_reg = gen_rtx_REG (SImode, R0_REG);
11057 while (1)
11059 if (INSN_P (insn))
11061 if (find_regno_note (insn, REG_DEAD, R0_REG))
11063 death++;
11064 live = 0;
11066 if (!live
11067 && (pset = single_set (insn))
11068 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11069 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11071 set++;
11072 live = 1;
11075 if (insn == end)
11076 break;
11077 insn = NEXT_INSN (insn);
11079 return set - death;
11082 /* Calculate regmode weights for all insns of all basic block. */
11083 static void
11084 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11085 int verbose ATTRIBUTE_UNUSED,
11086 int old_max_uid)
11088 basic_block b;
11090 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11091 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11092 r0_life_regions = 0;
11094 FOR_EACH_BB_REVERSE (b)
11096 find_regmode_weight (b, SImode);
11097 find_regmode_weight (b, SFmode);
11098 if (!reload_completed)
11099 r0_life_regions += find_r0_life_regions (b);
11102 CURR_REGMODE_PRESSURE (SImode) = 0;
11103 CURR_REGMODE_PRESSURE (SFmode) = 0;
11106 /* Cleanup. */
11107 static void
11108 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11109 int verbose ATTRIBUTE_UNUSED)
11111 if (regmode_weight[0])
11113 free (regmode_weight[0]);
11114 regmode_weight[0] = NULL;
11116 if (regmode_weight[1])
11118 free (regmode_weight[1]);
11119 regmode_weight[1] = NULL;
11123 /* The scalar modes supported differs from the default version in TImode
11124 for 32-bit SHMEDIA. */
11125 static bool
11126 sh_scalar_mode_supported_p (enum machine_mode mode)
11128 if (TARGET_SHMEDIA32 && mode == TImode)
11129 return false;
11131 return default_scalar_mode_supported_p (mode);
11134 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11135 keep count of register pressures on SImode and SFmode. */
11136 static int
11137 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11138 int sched_verbose ATTRIBUTE_UNUSED,
11139 rtx insn,
11140 int can_issue_more)
11142 if (GET_CODE (PATTERN (insn)) != USE
11143 && GET_CODE (PATTERN (insn)) != CLOBBER)
11144 cached_can_issue_more = can_issue_more - 1;
11145 else
11146 cached_can_issue_more = can_issue_more;
11148 if (reload_completed)
11149 return cached_can_issue_more;
11151 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11152 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11154 return cached_can_issue_more;
11157 static void
11158 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11159 int verbose ATTRIBUTE_UNUSED,
11160 int veclen ATTRIBUTE_UNUSED)
11162 CURR_REGMODE_PRESSURE (SImode) = 0;
11163 CURR_REGMODE_PRESSURE (SFmode) = 0;
11166 /* Some magic numbers. */
11167 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11168 functions that already have high pressure on r0. */
11169 #define R0_MAX_LIFE_REGIONS 2
11170 /* Register Pressure thresholds for SImode and SFmode registers. */
11171 #define SIMODE_MAX_WEIGHT 5
11172 #define SFMODE_MAX_WEIGHT 10
11174 /* Return true if the pressure is high for MODE. */
11175 static bool
11176 high_pressure (enum machine_mode mode)
11178 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11179 functions that already have high pressure on r0. */
11180 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11181 return true;
11183 if (mode == SFmode)
11184 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11185 else
11186 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11189 /* Reorder ready queue if register pressure is high. */
11190 static int
11191 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11192 int sched_verbose ATTRIBUTE_UNUSED,
11193 rtx *ready,
11194 int *n_readyp,
11195 int clock_var ATTRIBUTE_UNUSED)
11197 if (reload_completed)
11198 return sh_issue_rate ();
11200 if (high_pressure (SFmode) || high_pressure (SImode))
11202 ready_reorder (ready, *n_readyp);
11205 return sh_issue_rate ();
11208 /* Skip cycles if the current register pressure is high. */
11209 static int
11210 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11211 int sched_verbose ATTRIBUTE_UNUSED,
11212 rtx *ready ATTRIBUTE_UNUSED,
11213 int *n_readyp ATTRIBUTE_UNUSED,
11214 int clock_var ATTRIBUTE_UNUSED)
11216 if (reload_completed)
11217 return cached_can_issue_more;
11219 if (high_pressure(SFmode) || high_pressure (SImode))
11220 skip_cycles = 1;
11222 return cached_can_issue_more;
11225 /* Skip cycles without sorting the ready queue. This will move insn from
11226 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11227 queue by sh_reorder. */
11229 /* Generally, skipping these many cycles are sufficient for all insns to move
11230 from Q -> R. */
11231 #define MAX_SKIPS 8
11233 static int
11234 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11235 int sched_verbose ATTRIBUTE_UNUSED,
11236 rtx insn ATTRIBUTE_UNUSED,
11237 int last_clock_var,
11238 int clock_var,
11239 int *sort_p)
11241 if (reload_completed)
11242 return 0;
11244 if (skip_cycles)
11246 if ((clock_var - last_clock_var) < MAX_SKIPS)
11248 *sort_p = 0;
11249 return 1;
11251 /* If this is the last cycle we are skipping, allow reordering of R. */
11252 if ((clock_var - last_clock_var) == MAX_SKIPS)
11254 *sort_p = 1;
11255 return 1;
11259 skip_cycles = 0;
11261 return 0;
11264 /* SHmedia requires registers for branches, so we can't generate new
11265 branches past reload. */
11266 static bool
11267 sh_cannot_modify_jumps_p (void)
11269 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11272 static reg_class_t
11273 sh_target_reg_class (void)
11275 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11278 static bool
11279 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11281 if (! shmedia_space_reserved_for_target_registers)
11282 return 0;
11283 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11284 return 0;
11286 HARD_REG_SET dummy;
11287 if (calc_live_regs (&dummy) >= 6 * 8)
11288 return 1;
11289 return 0;
11292 static bool
11293 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11295 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11299 On the SH1..SH4, the trampoline looks like
11300 2 0002 D202 mov.l l2,r2
11301 1 0000 D301 mov.l l1,r3
11302 3 0004 422B jmp @r2
11303 4 0006 0009 nop
11304 5 0008 00000000 l1: .long area
11305 6 000c 00000000 l2: .long function
11307 SH5 (compact) uses r1 instead of r3 for the static chain. */
11310 /* Emit RTL insns to initialize the variable parts of a trampoline.
11311 FNADDR is an RTX for the address of the function's pure code.
11312 CXT is an RTX for the static chain value for the function. */
11313 static void
11314 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11316 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11317 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11319 if (TARGET_SHMEDIA64)
11321 rtx tramp_templ;
11322 int fixed_len;
11324 rtx movi1 = GEN_INT (0xcc000010);
11325 rtx shori1 = GEN_INT (0xc8000010);
11326 rtx src, dst;
11328 /* The following trampoline works within a +- 128 KB range for cxt:
11329 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11330 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11331 gettr tr1,r1; blink tr0,r63 */
11332 /* Address rounding makes it hard to compute the exact bounds of the
11333 offset for this trampoline, but we have a rather generous offset
11334 range, so frame_offset should do fine as an upper bound. */
11335 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11337 /* ??? could optimize this trampoline initialization
11338 by writing DImode words with two insns each. */
11339 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11340 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11341 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11342 insn = gen_rtx_AND (DImode, insn, mask);
11343 /* Or in ptb/u .,tr1 pattern */
11344 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11345 insn = force_operand (insn, NULL_RTX);
11346 insn = gen_lowpart (SImode, insn);
11347 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11348 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11349 insn = gen_rtx_AND (DImode, insn, mask);
11350 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11351 insn = gen_lowpart (SImode, insn);
11352 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11353 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11354 insn = gen_rtx_AND (DImode, insn, mask);
11355 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11356 insn = gen_lowpart (SImode, insn);
11357 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11358 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11359 insn = gen_rtx_AND (DImode, insn, mask);
11360 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11361 insn = gen_lowpart (SImode, insn);
11362 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11363 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11364 insn = gen_rtx_AND (DImode, insn, mask);
11365 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11366 insn = gen_lowpart (SImode, insn);
11367 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11368 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11369 GEN_INT (0x6bf10600));
11370 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11371 GEN_INT (0x4415fc10));
11372 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11373 GEN_INT (0x4401fff0));
11374 emit_insn (gen_ic_invalidate_line (tramp));
11375 return;
11377 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11378 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11380 tramp_templ = gen_datalabel_ref (tramp_templ);
11381 dst = tramp_mem;
11382 src = gen_const_mem (BLKmode, tramp_templ);
11383 set_mem_align (dst, 256);
11384 set_mem_align (src, 64);
11385 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11387 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11388 emit_move_insn (adjust_address (tramp_mem, Pmode,
11389 fixed_len + GET_MODE_SIZE (Pmode)),
11390 cxt);
11391 emit_insn (gen_ic_invalidate_line (tramp));
11392 return;
11394 else if (TARGET_SHMEDIA)
11396 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11397 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11398 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11399 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11400 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11401 rotated 10 right, and higher 16 bit of every 32 selected. */
11402 rtx movishori
11403 = force_reg (V2HImode, (simplify_gen_subreg
11404 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11405 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11406 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11408 fnaddr = force_reg (SImode, fnaddr);
11409 cxt = force_reg (SImode, cxt);
11410 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11411 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11412 movishori));
11413 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11414 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11415 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11416 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11417 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11418 gen_rtx_SUBREG (V2HImode, cxt, 0),
11419 movishori));
11420 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11421 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11422 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11423 if (TARGET_LITTLE_ENDIAN)
11425 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11426 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11428 else
11430 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11431 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11433 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11434 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11435 emit_insn (gen_ic_invalidate_line (tramp));
11436 return;
11438 else if (TARGET_SHCOMPACT)
11440 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11441 return;
11443 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11444 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11445 SImode));
11446 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11447 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11448 SImode));
11449 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11450 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11451 if (TARGET_HARD_SH4 || TARGET_SH5)
11453 if (!TARGET_INLINE_IC_INVALIDATE
11454 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
11455 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11456 FUNCTION_ORDINARY),
11457 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11458 else
11459 emit_insn (gen_ic_invalidate_line (tramp));
11463 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11464 static rtx
11465 sh_trampoline_adjust_address (rtx tramp)
11467 if (TARGET_SHMEDIA)
11468 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11469 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11470 return tramp;
11473 /* FIXME: This is overly conservative. A SHcompact function that
11474 receives arguments ``by reference'' will have them stored in its
11475 own stack frame, so it must not pass pointers or references to
11476 these arguments to other functions by means of sibling calls. */
11477 /* If PIC, we cannot make sibling calls to global functions
11478 because the PLT requires r12 to be live. */
11479 static bool
11480 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11482 return (1
11483 && (! TARGET_SHCOMPACT
11484 || crtl->args.info.stack_regs == 0)
11485 && ! sh_cfun_interrupt_handler_p ()
11486 && (! flag_pic
11487 || (decl && ! TREE_PUBLIC (decl))
11488 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11491 /* Machine specific built-in functions. */
11493 struct builtin_description
11495 bool (* const is_enabled) (void);
11496 const enum insn_code icode;
11497 const char *const name;
11498 int signature;
11499 tree fndecl;
11502 static bool
11503 shmedia_builtin_p (void)
11505 return TARGET_SHMEDIA;
11508 /* This function can be used if there are any built-ins that are not for
11509 SHmedia. It's commented out to avoid the defined-but-unused warning.
11510 static bool
11511 sh1_builtin_p (void)
11513 return TARGET_SH1;
11517 /* describe number and signedness of arguments; arg[0] == result
11518 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11519 /* 9: 64-bit pointer, 10: 32-bit pointer */
11520 static const char signature_args[][4] =
11522 #define SH_BLTIN_V2SI2 0
11523 { 4, 4 },
11524 #define SH_BLTIN_V4HI2 1
11525 { 4, 4 },
11526 #define SH_BLTIN_V2SI3 2
11527 { 4, 4, 4 },
11528 #define SH_BLTIN_V4HI3 3
11529 { 4, 4, 4 },
11530 #define SH_BLTIN_V8QI3 4
11531 { 4, 4, 4 },
11532 #define SH_BLTIN_MAC_HISI 5
11533 { 1, 4, 4, 1 },
11534 #define SH_BLTIN_SH_HI 6
11535 { 4, 4, 1 },
11536 #define SH_BLTIN_SH_SI 7
11537 { 4, 4, 1 },
11538 #define SH_BLTIN_V4HI2V2SI 8
11539 { 4, 4, 4 },
11540 #define SH_BLTIN_V4HI2V8QI 9
11541 { 4, 4, 4 },
11542 #define SH_BLTIN_SISF 10
11543 { 4, 2 },
11544 #define SH_BLTIN_LDUA_L 11
11545 { 2, 10 },
11546 #define SH_BLTIN_LDUA_Q 12
11547 { 1, 10 },
11548 #define SH_BLTIN_STUA_L 13
11549 { 0, 10, 2 },
11550 #define SH_BLTIN_STUA_Q 14
11551 { 0, 10, 1 },
11552 #define SH_BLTIN_LDUA_L64 15
11553 { 2, 9 },
11554 #define SH_BLTIN_LDUA_Q64 16
11555 { 1, 9 },
11556 #define SH_BLTIN_STUA_L64 17
11557 { 0, 9, 2 },
11558 #define SH_BLTIN_STUA_Q64 18
11559 { 0, 9, 1 },
11560 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11561 #define SH_BLTIN_2 19
11562 #define SH_BLTIN_SU 19
11563 { 1, 2 },
11564 #define SH_BLTIN_3 20
11565 #define SH_BLTIN_SUS 20
11566 { 2, 2, 1 },
11567 #define SH_BLTIN_PSSV 21
11568 { 0, 8, 2, 2 },
11569 #define SH_BLTIN_XXUU 22
11570 #define SH_BLTIN_UUUU 22
11571 { 1, 1, 1, 1 },
11572 #define SH_BLTIN_PV 23
11573 { 0, 8 },
11574 #define SH_BLTIN_VP 24
11575 { 8, 0 },
11577 /* mcmv: operands considered unsigned. */
11578 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11579 /* mperm: control value considered unsigned int. */
11580 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11581 /* mshards_q: returns signed short. */
11582 /* nsb: takes long long arg, returns unsigned char. */
11583 static struct builtin_description bdesc[] =
11585 { shmedia_builtin_p,
11586 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11587 { shmedia_builtin_p,
11588 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11589 { shmedia_builtin_p,
11590 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11591 { shmedia_builtin_p,
11592 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11593 { shmedia_builtin_p,
11594 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11595 { shmedia_builtin_p,
11596 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11597 { shmedia_builtin_p,
11598 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11599 { shmedia_builtin_p,
11600 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11601 { shmedia_builtin_p,
11602 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11603 { shmedia_builtin_p,
11604 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11605 { shmedia_builtin_p,
11606 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11607 { shmedia_builtin_p,
11608 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11609 { shmedia_builtin_p,
11610 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11611 { shmedia_builtin_p,
11612 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11613 { shmedia_builtin_p,
11614 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11615 { shmedia_builtin_p,
11616 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11617 { shmedia_builtin_p,
11618 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11619 { shmedia_builtin_p,
11620 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11621 { shmedia_builtin_p,
11622 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11623 { shmedia_builtin_p,
11624 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11625 { shmedia_builtin_p,
11626 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11627 { shmedia_builtin_p,
11628 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11629 { shmedia_builtin_p,
11630 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11631 { shmedia_builtin_p,
11632 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11633 { shmedia_builtin_p,
11634 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11635 { shmedia_builtin_p,
11636 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11637 { shmedia_builtin_p,
11638 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11639 { shmedia_builtin_p,
11640 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11641 { shmedia_builtin_p,
11642 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11643 { shmedia_builtin_p,
11644 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11645 { shmedia_builtin_p,
11646 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11647 { shmedia_builtin_p,
11648 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11649 { shmedia_builtin_p,
11650 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11651 { shmedia_builtin_p,
11652 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11653 { shmedia_builtin_p,
11654 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11655 { shmedia_builtin_p,
11656 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11657 { shmedia_builtin_p,
11658 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11659 { shmedia_builtin_p,
11660 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11661 { shmedia_builtin_p,
11662 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11663 { shmedia_builtin_p,
11664 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11665 { shmedia_builtin_p,
11666 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11667 { shmedia_builtin_p,
11668 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11669 { shmedia_builtin_p,
11670 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11671 { shmedia_builtin_p,
11672 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11673 { shmedia_builtin_p,
11674 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11675 { shmedia_builtin_p,
11676 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11677 { shmedia_builtin_p,
11678 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11679 { shmedia_builtin_p,
11680 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11681 { shmedia_builtin_p,
11682 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11683 { shmedia_builtin_p,
11684 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11685 { shmedia_builtin_p,
11686 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11687 { shmedia_builtin_p,
11688 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11689 { shmedia_builtin_p,
11690 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11691 { shmedia_builtin_p,
11692 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11693 { shmedia_builtin_p,
11694 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11695 { shmedia_builtin_p,
11696 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11697 { shmedia_builtin_p,
11698 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11699 { shmedia_builtin_p,
11700 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11701 { shmedia_builtin_p,
11702 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11703 { shmedia_builtin_p,
11704 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11705 { shmedia_builtin_p,
11706 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11707 { shmedia_builtin_p,
11708 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11709 { shmedia_builtin_p,
11710 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11711 { shmedia_builtin_p,
11712 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11713 { shmedia_builtin_p,
11714 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11715 { shmedia_builtin_p,
11716 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11717 { shmedia_builtin_p,
11718 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11719 { shmedia_builtin_p,
11720 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11721 { shmedia_builtin_p,
11722 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11723 { shmedia_builtin_p,
11724 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11725 { shmedia_builtin_p,
11726 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11727 { shmedia_builtin_p,
11728 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11729 { shmedia_builtin_p,
11730 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11731 { shmedia_builtin_p,
11732 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11733 { shmedia_builtin_p,
11734 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11735 { shmedia_builtin_p,
11736 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11737 { shmedia_builtin_p,
11738 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11739 { shmedia_builtin_p,
11740 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11741 { shmedia_builtin_p,
11742 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11743 { shmedia_builtin_p,
11744 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11745 { shmedia_builtin_p,
11746 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11747 { shmedia_builtin_p,
11748 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11749 { shmedia_builtin_p,
11750 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11753 static void
11754 sh_init_builtins (void)
11756 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11757 memset (shared, 0, sizeof shared);
11759 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
11761 builtin_description* d = &bdesc[di];
11763 if (!d->is_enabled ())
11764 continue;
11766 tree type, arg_type = NULL_TREE;
11767 int signature = d->signature;
11769 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11770 type = shared[signature];
11771 else
11773 int has_result = signature_args[signature][0] != 0;
11774 tree args[3];
11776 if ((signature_args[signature][1] & 8)
11777 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11778 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11779 continue;
11780 if (! TARGET_FPU_ANY
11781 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11782 continue;
11783 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
11784 args[i] = NULL_TREE;
11785 for (int i = 3; ; i--)
11787 int arg = signature_args[signature][i];
11788 int opno = i - 1 + has_result;
11790 if (arg & 8)
11791 arg_type = ptr_type_node;
11792 else if (arg)
11793 arg_type = (*lang_hooks.types.type_for_mode)
11794 (insn_data[d->icode].operand[opno].mode, (arg & 1));
11795 else if (i)
11796 continue;
11797 else
11798 arg_type = void_type_node;
11799 if (i == 0)
11800 break;
11801 args[i-1] = arg_type;
11803 type = build_function_type_list (arg_type, args[0], args[1],
11804 args[2], NULL_TREE);
11805 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11806 shared[signature] = type;
11808 d->fndecl =
11809 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11810 NULL, NULL_TREE);
11814 /* Implements target hook vector_mode_supported_p. */
11815 bool
11816 sh_vector_mode_supported_p (enum machine_mode mode)
11818 if (TARGET_FPU_ANY
11819 && ((mode == V2SFmode)
11820 || (mode == V4SFmode)
11821 || (mode == V16SFmode)))
11822 return true;
11824 else if (TARGET_SHMEDIA
11825 && ((mode == V8QImode)
11826 || (mode == V2HImode)
11827 || (mode == V4HImode)
11828 || (mode == V2SImode)))
11829 return true;
11831 return false;
11834 bool
11835 sh_frame_pointer_required (void)
11837 /* If needed override this in other tm.h files to cope with various OS
11838 lossage requiring a frame pointer. */
11839 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11840 return true;
11842 if (crtl->profile)
11843 return true;
11845 return false;
11848 /* Implements target hook dwarf_calling_convention. Return an enum
11849 of dwarf_calling_convention. */
11851 sh_dwarf_calling_convention (const_tree func)
11853 if (sh_attr_renesas_p (func))
11854 return DW_CC_GNU_renesas_sh;
11856 return DW_CC_normal;
11859 /* Returns the sh builtin decl for CODE. */
11860 static tree
11861 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11863 if (code >= ARRAY_SIZE (bdesc))
11864 return error_mark_node;
11866 if (!bdesc[code].is_enabled ())
11867 return error_mark_node;
11869 return bdesc[code].fndecl;
11872 /* Expand an expression EXP that calls a built-in function,
11873 with result going to TARGET if that's convenient
11874 (and in mode MODE if that's convenient).
11875 SUBTARGET may be used as the target for computing one of EXP's operands.
11876 IGNORE is nonzero if the value is to be ignored. */
11877 static rtx
11878 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
11879 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
11881 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11882 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
11883 const struct builtin_description *d = &bdesc[fcode];
11884 enum insn_code icode = d->icode;
11885 int signature = d->signature;
11886 int nop = 0;
11887 rtx op[4];
11889 if (signature_args[signature][0])
11891 if (ignore)
11892 return NULL_RTX;
11894 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11895 if (! target || GET_MODE (target) != tmode
11896 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11897 target = gen_reg_rtx (tmode);
11898 op[nop++] = target;
11900 else
11901 target = NULL_RTX;
11903 for (int i = 1; i <= 3; i++, nop++)
11905 tree arg;
11906 enum machine_mode opmode, argmode;
11907 tree optype;
11909 if (! signature_args[signature][i])
11910 break;
11911 arg = CALL_EXPR_ARG (exp, i - 1);
11912 if (arg == error_mark_node)
11913 return const0_rtx;
11914 if (signature_args[signature][i] & 8)
11916 opmode = ptr_mode;
11917 optype = ptr_type_node;
11919 else
11921 opmode = insn_data[icode].operand[nop].mode;
11922 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
11924 argmode = TYPE_MODE (TREE_TYPE (arg));
11925 if (argmode != opmode)
11926 arg = build1 (NOP_EXPR, optype, arg);
11927 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
11928 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
11929 op[nop] = copy_to_mode_reg (opmode, op[nop]);
11932 rtx pat = NULL_RTX;
11934 switch (nop)
11936 case 1:
11937 pat = (*insn_data[d->icode].genfun) (op[0]);
11938 break;
11939 case 2:
11940 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
11941 break;
11942 case 3:
11943 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
11944 break;
11945 case 4:
11946 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
11947 break;
11948 default:
11949 gcc_unreachable ();
11951 if (! pat)
11952 return NULL_RTX;
11953 emit_insn (pat);
11954 return target;
11957 void
11958 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
11960 rtx sel0 = const0_rtx;
11961 rtx sel1 = const1_rtx;
11962 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
11963 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
11965 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
11966 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
11969 void
11970 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
11972 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
11974 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
11975 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
11978 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
11979 We can allow any mode in any general register. The special registers
11980 only allow SImode. Don't allow any mode in the PR.
11982 We cannot hold DCmode values in the XD registers because alter_reg
11983 handles subregs of them incorrectly. We could work around this by
11984 spacing the XD registers like the DR registers, but this would require
11985 additional memory in every compilation to hold larger register vectors.
11986 We could hold SFmode / SCmode values in XD registers, but that
11987 would require a tertiary reload when reloading from / to memory,
11988 and a secondary reload to reload from / to general regs; that
11989 seems to be a losing proposition.
11991 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
11992 it won't be ferried through GP registers first. */
11993 bool
11994 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11996 if (SPECIAL_REGISTER_P (regno))
11997 return mode == SImode;
11999 if (regno == FPUL_REG)
12000 return (mode == SImode || mode == SFmode);
12002 if (FP_REGISTER_P (regno) && mode == SFmode)
12003 return true;
12005 if (mode == V2SFmode)
12007 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12008 || GENERAL_REGISTER_P (regno)))
12009 return true;
12010 else
12011 return false;
12014 if (mode == V4SFmode)
12016 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12017 || GENERAL_REGISTER_P (regno))
12018 return true;
12019 else
12020 return false;
12023 if (mode == V16SFmode)
12025 if (TARGET_SHMEDIA)
12027 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12028 return true;
12029 else
12030 return false;
12032 else
12033 return regno == FIRST_XD_REG;
12036 if (FP_REGISTER_P (regno))
12038 if (mode == SFmode
12039 || mode == SImode
12040 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12041 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12042 || mode == DCmode
12043 || (TARGET_SHMEDIA
12044 && (mode == DFmode || mode == DImode
12045 || mode == V2SFmode || mode == TImode)))
12046 && ((regno - FIRST_FP_REG) & 1) == 0)
12047 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12048 && ((regno - FIRST_FP_REG) & 3) == 0))
12049 return true;
12050 else
12051 return false;
12054 if (XD_REGISTER_P (regno))
12055 return mode == DFmode;
12057 if (TARGET_REGISTER_P (regno))
12058 return (mode == DImode || mode == SImode || mode == PDImode);
12060 if (regno == PR_REG)
12061 return mode == SImode;
12063 if (regno == FPSCR_REG)
12064 return mode == PSImode;
12066 /* FIXME. This works around PR target/37633 for -O0. */
12067 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12069 unsigned int n = GET_MODE_SIZE (mode) / 8;
12071 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12072 && regno <= FIRST_GENERAL_REG + 14)
12073 return false;
12076 return true;
12079 /* Return the class of registers for which a mode change from FROM to TO
12080 is invalid. */
12081 bool
12082 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
12083 enum reg_class rclass)
12085 /* We want to enable the use of SUBREGs as a means to
12086 VEC_SELECT a single element of a vector. */
12088 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12089 This can be problematic when SFmode vector subregs need to be accessed
12090 on the stack with displacement addressing, as it happens with -O0.
12091 Thus we disallow the mode change for -O0. */
12092 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12093 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12095 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12097 if (TARGET_LITTLE_ENDIAN)
12099 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12100 return reg_classes_intersect_p (DF_REGS, rclass);
12102 else
12104 if (GET_MODE_SIZE (from) < 8)
12105 return reg_classes_intersect_p (DF_REGS, rclass);
12108 return false;
12111 /* Return true if registers in machine mode MODE will likely be
12112 allocated to registers in small register classes. */
12113 bool
12114 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
12116 return (! TARGET_SHMEDIA);
12119 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12120 that label is used. */
12121 void
12122 sh_mark_label (rtx address, int nuses)
12124 if (GOTOFF_P (address))
12126 /* Extract the label or symbol. */
12127 address = XEXP (address, 0);
12128 if (GET_CODE (address) == PLUS)
12129 address = XEXP (address, 0);
12130 address = XVECEXP (address, 0, 0);
12132 if (GET_CODE (address) == LABEL_REF
12133 && LABEL_P (XEXP (address, 0)))
12134 LABEL_NUSES (XEXP (address, 0)) += nuses;
12137 /* Compute extra cost of moving data between one register class
12138 and another.
12140 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12141 uses this information. Hence, the general register <-> floating point
12142 register information here is not used for SFmode. */
12143 static int
12144 sh_register_move_cost (enum machine_mode mode,
12145 reg_class_t srcclass, reg_class_t dstclass)
12147 if (dstclass == T_REGS || dstclass == PR_REGS)
12148 return 10;
12150 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12151 return 4;
12153 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12154 && REGCLASS_HAS_FP_REG (srcclass)
12155 && REGCLASS_HAS_FP_REG (dstclass))
12156 return 4;
12158 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12159 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12161 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12162 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12163 return 9;
12165 if ((REGCLASS_HAS_FP_REG (dstclass)
12166 && REGCLASS_HAS_GENERAL_REG (srcclass))
12167 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12168 && REGCLASS_HAS_FP_REG (srcclass)))
12170 /* Discourage trying to use fp regs for a pointer. This also
12171 discourages fp regs with SImode because Pmode is an alias
12172 of SImode on this target. See PR target/48596. */
12173 int addend = (mode == Pmode) ? 40 : 0;
12175 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12176 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12179 if ((dstclass == FPUL_REGS
12180 && REGCLASS_HAS_GENERAL_REG (srcclass))
12181 || (srcclass == FPUL_REGS
12182 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12183 return 5;
12185 if ((dstclass == FPUL_REGS
12186 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12187 || (srcclass == FPUL_REGS
12188 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12189 return 7;
12191 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12192 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12193 return 20;
12195 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12196 if (TARGET_SHMEDIA
12197 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12199 if (sh_gettrcost >= 0)
12200 return sh_gettrcost;
12201 else if (!TARGET_PT_FIXED)
12202 return 100;
12205 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12206 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12207 return 4;
12209 if (TARGET_SHMEDIA
12210 || (TARGET_FMOVD
12211 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12212 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12213 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12215 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12218 static rtx
12219 emit_load_ptr (rtx reg, rtx addr)
12221 rtx mem = gen_const_mem (ptr_mode, addr);
12223 if (Pmode != ptr_mode)
12224 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12225 return emit_move_insn (reg, mem);
12228 static void
12229 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12230 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12231 tree function)
12233 CUMULATIVE_ARGS cum;
12234 int structure_value_byref = 0;
12235 rtx this_rtx, this_value, sibcall, insns, funexp;
12236 tree funtype = TREE_TYPE (function);
12237 int simple_add = CONST_OK_FOR_ADD (delta);
12238 int did_load = 0;
12239 rtx scratch0, scratch1, scratch2;
12240 unsigned i;
12242 reload_completed = 1;
12243 epilogue_completed = 1;
12244 crtl->uses_only_leaf_regs = 1;
12246 emit_note (NOTE_INSN_PROLOGUE_END);
12248 /* Find the "this" pointer. We have such a wide range of ABIs for the
12249 SH that it's best to do this completely machine independently.
12250 "this" is passed as first argument, unless a structure return pointer
12251 comes first, in which case "this" comes second. */
12252 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12253 #ifndef PCC_STATIC_STRUCT_RETURN
12254 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12255 structure_value_byref = 1;
12256 #endif /* not PCC_STATIC_STRUCT_RETURN */
12257 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12259 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12261 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12263 this_rtx
12264 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12266 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12267 static chain pointer (even if you can't have nested virtual functions
12268 right now, someone might implement them sometime), and the rest of the
12269 registers are used for argument passing, are callee-saved, or reserved. */
12270 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12271 -ffixed-reg has been used. */
12272 if (! call_used_regs[0] || fixed_regs[0])
12273 error ("r0 needs to be available as a call-clobbered register");
12274 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12275 if (! TARGET_SH5)
12277 if (call_used_regs[1] && ! fixed_regs[1])
12278 scratch1 = gen_rtx_REG (ptr_mode, 1);
12279 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12280 pointing where to return struct values. */
12281 if (call_used_regs[3] && ! fixed_regs[3])
12282 scratch2 = gen_rtx_REG (Pmode, 3);
12284 else if (TARGET_SHMEDIA)
12286 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12287 if (i != REGNO (scratch0) &&
12288 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12290 scratch1 = gen_rtx_REG (ptr_mode, i);
12291 break;
12293 if (scratch1 == scratch0)
12294 error ("need a second call-clobbered general purpose register");
12295 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12296 if (call_used_regs[i] && ! fixed_regs[i])
12298 scratch2 = gen_rtx_REG (Pmode, i);
12299 break;
12301 if (scratch2 == scratch0)
12302 error ("need a call-clobbered target register");
12305 this_value = plus_constant (Pmode, this_rtx, delta);
12306 if (vcall_offset
12307 && (simple_add || scratch0 != scratch1)
12308 && strict_memory_address_p (ptr_mode, this_value))
12310 emit_load_ptr (scratch0, this_value);
12311 did_load = 1;
12314 if (!delta)
12315 ; /* Do nothing. */
12316 else if (simple_add)
12317 emit_move_insn (this_rtx, this_value);
12318 else
12320 emit_move_insn (scratch1, GEN_INT (delta));
12321 emit_insn (gen_add2_insn (this_rtx, scratch1));
12324 if (vcall_offset)
12326 rtx offset_addr;
12328 if (!did_load)
12329 emit_load_ptr (scratch0, this_rtx);
12331 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12332 if (strict_memory_address_p (ptr_mode, offset_addr))
12333 ; /* Do nothing. */
12334 else if (! TARGET_SH5 && scratch0 != scratch1)
12336 /* scratch0 != scratch1, and we have indexed loads. Get better
12337 schedule by loading the offset into r1 and using an indexed
12338 load - then the load of r1 can issue before the load from
12339 (this_rtx + delta) finishes. */
12340 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12341 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12343 else if (CONST_OK_FOR_ADD (vcall_offset))
12345 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12346 offset_addr = scratch0;
12348 else if (scratch0 != scratch1)
12350 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12351 emit_insn (gen_add2_insn (scratch0, scratch1));
12352 offset_addr = scratch0;
12354 else
12355 gcc_unreachable (); /* FIXME */
12356 emit_load_ptr (scratch0, offset_addr);
12358 if (Pmode != ptr_mode)
12359 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12360 emit_insn (gen_add2_insn (this_rtx, scratch0));
12363 /* Generate a tail call to the target function. */
12364 if (! TREE_USED (function))
12366 assemble_external (function);
12367 TREE_USED (function) = 1;
12369 funexp = XEXP (DECL_RTL (function), 0);
12370 /* If the function is overridden, so is the thunk, hence we don't
12371 need GOT addressing even if this is a public symbol. */
12372 #if 0
12373 if (TARGET_SH1 && ! flag_weak)
12374 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12375 else
12376 #endif
12377 if (TARGET_SH2 && flag_pic)
12379 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12380 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12382 else
12384 if (TARGET_SHMEDIA && flag_pic)
12386 funexp = gen_sym2PIC (funexp);
12387 PUT_MODE (funexp, Pmode);
12389 emit_move_insn (scratch2, funexp);
12390 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12391 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12393 sibcall = emit_call_insn (sibcall);
12394 SIBLING_CALL_P (sibcall) = 1;
12395 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12396 emit_barrier ();
12398 /* Run just enough of rest_of_compilation to do scheduling and get
12399 the insns emitted. Note that use_thunk calls
12400 assemble_start_function and assemble_end_function. */
12402 insns = get_insns ();
12404 if (optimize > 0)
12406 if (! cfun->cfg)
12407 init_flow (cfun);
12408 split_all_insns_noflow ();
12411 sh_reorg ();
12412 shorten_branches (insns);
12413 final_start_function (insns, file, 1);
12414 final (insns, file, 1);
12415 final_end_function ();
12417 reload_completed = 0;
12418 epilogue_completed = 0;
12422 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12424 rtx sym;
12426 /* If this is not an ordinary function, the name usually comes from a
12427 string literal or an sprintf buffer. Make sure we use the same
12428 string consistently, so that cse will be able to unify address loads. */
12429 if (kind != FUNCTION_ORDINARY)
12430 name = IDENTIFIER_POINTER (get_identifier (name));
12431 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12432 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12433 if (flag_pic)
12434 switch (kind)
12436 case FUNCTION_ORDINARY:
12437 break;
12438 case SFUNC_GOT:
12440 rtx reg = target ? target : gen_reg_rtx (Pmode);
12442 emit_insn (gen_symGOT2reg (reg, sym));
12443 sym = reg;
12444 break;
12446 case SFUNC_STATIC:
12448 /* ??? To allow cse to work, we use GOTOFF relocations.
12449 We could add combiner patterns to transform this into
12450 straight pc-relative calls with sym2PIC / bsrf when
12451 label load and function call are still 1:1 and in the
12452 same basic block during combine. */
12453 rtx reg = target ? target : gen_reg_rtx (Pmode);
12455 emit_insn (gen_symGOTOFF2reg (reg, sym));
12456 sym = reg;
12457 break;
12460 if (target && sym != target)
12462 emit_move_insn (target, sym);
12463 return target;
12465 return sym;
12468 /* Find the number of a general purpose register in S. */
12469 static int
12470 scavenge_reg (HARD_REG_SET *s)
12472 int r;
12473 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12474 if (TEST_HARD_REG_BIT (*s, r))
12475 return r;
12476 return -1;
12480 sh_get_pr_initial_val (void)
12482 rtx val;
12484 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12485 PR register on SHcompact, because it might be clobbered by the prologue.
12486 We check first if that is known to be the case. */
12487 if (TARGET_SHCOMPACT
12488 && ((crtl->args.info.call_cookie
12489 & ~ CALL_COOKIE_RET_TRAMP (1))
12490 || crtl->saves_all_registers))
12491 return gen_frame_mem (SImode, return_address_pointer_rtx);
12493 /* If we haven't finished rtl generation, there might be a nonlocal label
12494 that we haven't seen yet.
12495 ??? get_hard_reg_initial_val fails if it is called after register
12496 allocation has started, unless it has been called before for the
12497 same register. And even then, we end in trouble if we didn't use
12498 the register in the same basic block before. So call
12499 get_hard_reg_initial_val now and wrap it in an unspec if we might
12500 need to replace it. */
12501 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12502 combine can put the pseudo returned by get_hard_reg_initial_val into
12503 instructions that need a general purpose registers, which will fail to
12504 be recognized when the pseudo becomes allocated to PR. */
12506 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12507 if (TARGET_SH1)
12508 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12509 return val;
12512 bool
12513 sh_expand_t_scc (rtx operands[])
12515 enum rtx_code code = GET_CODE (operands[1]);
12516 rtx target = operands[0];
12517 rtx op0 = operands[2];
12518 rtx op1 = operands[3];
12519 rtx result = target;
12520 HOST_WIDE_INT val;
12522 if (!REG_P (op0) || REGNO (op0) != T_REG
12523 || !CONST_INT_P (op1))
12524 return false;
12525 if (!REG_P (result))
12526 result = gen_reg_rtx (SImode);
12527 val = INTVAL (op1);
12528 if ((code == EQ && val == 1) || (code == NE && val == 0))
12529 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12530 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12531 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12532 else if (code == EQ || code == NE)
12533 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12534 else
12535 return false;
12536 if (result != target)
12537 emit_move_insn (target, result);
12538 return true;
12541 /* INSN is an sfunc; return the rtx that describes the address used. */
12542 static rtx
12543 extract_sfunc_addr (rtx insn)
12545 rtx pattern, part = NULL_RTX;
12546 int len, i;
12548 pattern = PATTERN (insn);
12549 len = XVECLEN (pattern, 0);
12550 for (i = 0; i < len; i++)
12552 part = XVECEXP (pattern, 0, i);
12553 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12554 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12555 return XEXP (part, 0);
12557 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12558 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12561 /* Verify that the register in use_sfunc_addr still agrees with the address
12562 used in the sfunc. This prevents fill_slots_from_thread from changing
12563 use_sfunc_addr.
12564 INSN is the use_sfunc_addr instruction, and REG is the register it
12565 guards. */
12566 bool
12567 check_use_sfunc_addr (rtx insn, rtx reg)
12569 /* Search for the sfunc. It should really come right after INSN. */
12570 while ((insn = NEXT_INSN (insn)))
12572 if (LABEL_P (insn) || JUMP_P (insn))
12573 break;
12574 if (! INSN_P (insn))
12575 continue;
12577 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
12578 insn = XVECEXP (PATTERN (insn), 0, 0);
12579 if (GET_CODE (PATTERN (insn)) != PARALLEL
12580 || get_attr_type (insn) != TYPE_SFUNC)
12581 continue;
12582 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12584 gcc_unreachable ();
12587 /* This function returns a constant rtx that represents 2**15 / pi in
12588 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12589 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12590 static GTY(()) rtx sh_fsca_sf2int_rtx;
12593 sh_fsca_sf2int (void)
12595 if (! sh_fsca_sf2int_rtx)
12597 REAL_VALUE_TYPE rv;
12599 real_from_string (&rv, "10430.378350470453");
12600 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12603 return sh_fsca_sf2int_rtx;
12606 /* This function returns a constant rtx that represents pi / 2**15 in
12607 SFmode. It's used to scale SFmode angles, in radians, to a
12608 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12609 maps to 0x10000. */
12610 static GTY(()) rtx sh_fsca_int2sf_rtx;
12613 sh_fsca_int2sf (void)
12615 if (! sh_fsca_int2sf_rtx)
12617 REAL_VALUE_TYPE rv;
12619 real_from_string (&rv, "9.587379924285257e-5");
12620 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12623 return sh_fsca_int2sf_rtx;
12626 /* Initialize the CUMULATIVE_ARGS structure. */
12627 void
12628 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12629 tree fntype,
12630 rtx libname ATTRIBUTE_UNUSED,
12631 tree fndecl,
12632 signed int n_named_args,
12633 enum machine_mode mode)
12635 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12636 pcum->free_single_fp_reg = 0;
12637 pcum->stack_regs = 0;
12638 pcum->byref_regs = 0;
12639 pcum->byref = 0;
12640 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12642 /* XXX - Should we check TARGET_HITACHI here ??? */
12643 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12645 if (fntype)
12647 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12648 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12649 pcum->prototype_p = prototype_p (fntype);
12650 pcum->arg_count [(int) SH_ARG_INT]
12651 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12653 pcum->call_cookie
12654 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12655 && pcum->arg_count [(int) SH_ARG_INT] == 0
12656 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12657 ? int_size_in_bytes (TREE_TYPE (fntype))
12658 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12659 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12660 == FIRST_RET_REG));
12662 else
12664 pcum->arg_count [(int) SH_ARG_INT] = 0;
12665 pcum->prototype_p = FALSE;
12666 if (mode != VOIDmode)
12668 pcum->call_cookie =
12669 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12670 && GET_MODE_SIZE (mode) > 4
12671 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12673 /* If the default ABI is the Renesas ABI then all library
12674 calls must assume that the library will be using the
12675 Renesas ABI. So if the function would return its result
12676 in memory then we must force the address of this memory
12677 block onto the stack. Ideally we would like to call
12678 targetm.calls.return_in_memory() here but we do not have
12679 the TYPE or the FNDECL available so we synthesize the
12680 contents of that function as best we can. */
12681 pcum->force_mem =
12682 (TARGET_DEFAULT & MASK_HITACHI)
12683 && (mode == BLKmode
12684 || (GET_MODE_SIZE (mode) > 4
12685 && !(mode == DFmode
12686 && TARGET_FPU_DOUBLE)));
12688 else
12690 pcum->call_cookie = 0;
12691 pcum->force_mem = FALSE;
12696 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12697 not enter into CONST_DOUBLE for the replace.
12699 Note that copying is not done so X must not be shared unless all copies
12700 are to be modified.
12702 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12703 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12704 replacements[n*2+1] - and that we take mode changes into account.
12706 If a replacement is ambiguous, return NULL_RTX.
12708 If MODIFY is zero, don't modify any rtl in place,
12709 just return zero or nonzero for failure / success. */
12711 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12713 int i, j;
12714 const char *fmt;
12716 /* The following prevents loops occurrence when we change MEM in
12717 CONST_DOUBLE onto the same CONST_DOUBLE. */
12718 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
12719 return x;
12721 for (i = n_replacements - 1; i >= 0 ; i--)
12722 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12723 return replacements[i*2+1];
12725 /* Allow this function to make replacements in EXPR_LISTs. */
12726 if (x == NULL_RTX)
12727 return NULL_RTX;
12729 if (GET_CODE (x) == SUBREG)
12731 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12732 n_replacements, modify);
12734 if (CONST_INT_P (new_rtx))
12736 x = simplify_subreg (GET_MODE (x), new_rtx,
12737 GET_MODE (SUBREG_REG (x)),
12738 SUBREG_BYTE (x));
12739 if (! x)
12740 abort ();
12742 else if (modify)
12743 SUBREG_REG (x) = new_rtx;
12745 return x;
12747 else if (REG_P (x))
12749 unsigned regno = REGNO (x);
12750 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12751 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12752 rtx result = NULL_RTX;
12754 for (i = n_replacements - 1; i >= 0; i--)
12756 rtx from = replacements[i*2];
12757 rtx to = replacements[i*2+1];
12758 unsigned from_regno, from_nregs, to_regno, new_regno;
12760 if (!REG_P (from))
12761 continue;
12762 from_regno = REGNO (from);
12763 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12764 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12765 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12767 if (regno < from_regno
12768 || regno + nregs > from_regno + nregs
12769 || !REG_P (to)
12770 || result)
12771 return NULL_RTX;
12772 to_regno = REGNO (to);
12773 if (to_regno < FIRST_PSEUDO_REGISTER)
12775 new_regno = regno + to_regno - from_regno;
12776 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12777 != nregs)
12778 return NULL_RTX;
12779 result = gen_rtx_REG (GET_MODE (x), new_regno);
12781 else if (GET_MODE (x) <= GET_MODE (to))
12782 result = gen_lowpart_common (GET_MODE (x), to);
12783 else
12784 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12787 return result ? result : x;
12789 else if (GET_CODE (x) == ZERO_EXTEND)
12791 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12792 n_replacements, modify);
12794 if (CONST_INT_P (new_rtx))
12796 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12797 new_rtx, GET_MODE (XEXP (x, 0)));
12798 if (! x)
12799 abort ();
12801 else if (modify)
12802 XEXP (x, 0) = new_rtx;
12804 return x;
12807 fmt = GET_RTX_FORMAT (GET_CODE (x));
12808 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12810 rtx new_rtx;
12812 if (fmt[i] == 'e')
12814 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12815 n_replacements, modify);
12816 if (!new_rtx)
12817 return NULL_RTX;
12818 if (modify)
12819 XEXP (x, i) = new_rtx;
12821 else if (fmt[i] == 'E')
12822 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12824 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12825 n_replacements, modify);
12826 if (!new_rtx)
12827 return NULL_RTX;
12828 if (modify)
12829 XVECEXP (x, i, j) = new_rtx;
12833 return x;
12837 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12839 enum rtx_code code = TRUNCATE;
12841 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12843 rtx inner = XEXP (x, 0);
12844 enum machine_mode inner_mode = GET_MODE (inner);
12846 if (inner_mode == mode)
12847 return inner;
12848 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12849 x = inner;
12850 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12851 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
12853 code = GET_CODE (x);
12854 x = inner;
12857 return gen_rtx_fmt_e (code, mode, x);
12860 /* Called via for_each_rtx after reload, to clean up truncates of
12861 registers that span multiple actual hard registers. */
12863 shmedia_cleanup_truncate (rtx *p, void *n_changes)
12865 rtx x = *p, reg;
12867 if (GET_CODE (x) != TRUNCATE)
12868 return 0;
12869 reg = XEXP (x, 0);
12870 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
12872 enum machine_mode reg_mode = GET_MODE (reg);
12873 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
12874 subreg_lowpart_offset (DImode, reg_mode));
12875 *(int*) n_changes += 1;
12876 return -1;
12878 return 0;
12881 /* Load and store depend on the highpart of the address. However,
12882 set_attr_alternative does not give well-defined results before reload,
12883 so we must look at the rtl ourselves to see if any of the feeding
12884 registers is used in a memref.
12886 Called by sh_contains_memref_p via for_each_rtx. */
12887 static int
12888 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
12890 return (MEM_P (*loc));
12893 /* Return true iff INSN contains a MEM. */
12894 bool
12895 sh_contains_memref_p (rtx insn)
12897 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
12900 /* Return true iff INSN loads a banked register. */
12901 bool
12902 sh_loads_bankedreg_p (rtx insn)
12904 if (GET_CODE (PATTERN (insn)) == SET)
12906 rtx op = SET_DEST (PATTERN(insn));
12907 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
12908 return true;
12911 return false;
12914 /* FNADDR is the MEM expression from a call expander. Return an address
12915 to use in an SHmedia insn pattern. */
12917 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
12919 int is_sym;
12921 fnaddr = XEXP (fnaddr, 0);
12922 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
12923 if (flag_pic && is_sym)
12925 if (! SYMBOL_REF_LOCAL_P (fnaddr))
12927 rtx reg = gen_reg_rtx (Pmode);
12929 /* We must not use GOTPLT for sibcalls, because PIC_REG
12930 must be restored before the PLT code gets to run. */
12931 if (is_sibcall)
12932 emit_insn (gen_symGOT2reg (reg, fnaddr));
12933 else
12934 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
12935 fnaddr = reg;
12937 else
12939 fnaddr = gen_sym2PIC (fnaddr);
12940 PUT_MODE (fnaddr, Pmode);
12943 /* If ptabs might trap, make this visible to the rest of the compiler.
12944 We generally assume that symbols pertain to valid locations, but
12945 it is possible to generate invalid symbols with asm or linker tricks.
12946 In a list of functions where each returns its successor, an invalid
12947 symbol might denote an empty list. */
12948 if (!TARGET_PT_FIXED
12949 && (!is_sym || TARGET_INVALID_SYMBOLS)
12950 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
12952 rtx tr = gen_reg_rtx (PDImode);
12954 emit_insn (gen_ptabs (tr, fnaddr));
12955 fnaddr = tr;
12957 else if (! target_reg_operand (fnaddr, Pmode))
12958 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
12959 return fnaddr;
12962 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
12963 static reg_class_t
12964 sh_preferred_reload_class (rtx x, reg_class_t rclass)
12966 if (rclass == NO_REGS
12967 && TARGET_SHMEDIA
12968 && (CONST_DOUBLE_P (x)
12969 || GET_CODE (x) == SYMBOL_REF
12970 || PIC_ADDR_P (x)))
12971 return GENERAL_REGS;
12973 return rclass;
12976 /* Implement TARGET_SECONDARY_RELOAD. */
12977 static reg_class_t
12978 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
12979 enum machine_mode mode, secondary_reload_info *sri)
12981 enum reg_class rclass = (enum reg_class) rclass_i;
12983 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
12984 && REG_P (XEXP (XEXP (x, 0), 0))
12985 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
12986 return rclass == R0_REGS ? NO_REGS : R0_REGS;
12988 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
12989 return rclass == R0_REGS ? NO_REGS : R0_REGS;
12991 if (REG_P (x) && REGNO (x) == GBR_REG)
12992 return NO_REGS;
12994 if (in_p)
12996 if (REGCLASS_HAS_FP_REG (rclass)
12997 && ! TARGET_SHMEDIA
12998 && immediate_operand ((x), mode)
12999 && ! ((fp_zero_operand (x) || fp_one_operand (x))
13000 && mode == SFmode && fldi_ok ()))
13001 switch (mode)
13003 case SFmode:
13004 sri->icode = CODE_FOR_reload_insf__frn;
13005 return NO_REGS;
13006 case DFmode:
13007 sri->icode = CODE_FOR_reload_indf__frn;
13008 return NO_REGS;
13009 case SImode:
13010 /* ??? If we knew that we are in the appropriate mode -
13011 single precision - we could use a reload pattern directly. */
13012 return FPUL_REGS;
13013 default:
13014 abort ();
13016 if (rclass == FPUL_REGS
13017 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13018 || REGNO (x) == T_REG))
13019 || GET_CODE (x) == PLUS))
13020 return GENERAL_REGS;
13021 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13023 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13024 return GENERAL_REGS;
13025 else if (mode == SFmode)
13026 return FP_REGS;
13027 sri->icode = CODE_FOR_reload_insi__i_fpul;
13028 return NO_REGS;
13030 if (rclass == FPSCR_REGS
13031 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13032 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13033 return GENERAL_REGS;
13034 if (REGCLASS_HAS_FP_REG (rclass)
13035 && TARGET_SHMEDIA
13036 && immediate_operand (x, mode)
13037 && x != CONST0_RTX (GET_MODE (x))
13038 && GET_MODE (x) != V4SFmode)
13039 return GENERAL_REGS;
13040 if ((mode == QImode || mode == HImode)
13041 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13043 sri->icode = ((mode == QImode)
13044 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13045 return NO_REGS;
13047 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13048 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13049 return TARGET_REGS;
13050 } /* end of input-only processing. */
13052 if (((REGCLASS_HAS_FP_REG (rclass)
13053 && (REG_P (x)
13054 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13055 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13056 && TARGET_FMOVD))))
13057 || (REGCLASS_HAS_GENERAL_REG (rclass)
13058 && REG_P (x)
13059 && FP_REGISTER_P (REGNO (x))))
13060 && ! TARGET_SHMEDIA
13061 && (mode == SFmode || mode == SImode))
13062 return FPUL_REGS;
13063 if ((rclass == FPUL_REGS
13064 || (REGCLASS_HAS_FP_REG (rclass)
13065 && ! TARGET_SHMEDIA && mode == SImode))
13066 && (MEM_P (x)
13067 || (REG_P (x)
13068 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13069 || REGNO (x) == T_REG
13070 || system_reg_operand (x, VOIDmode)))))
13072 if (rclass == FPUL_REGS)
13073 return GENERAL_REGS;
13074 return FPUL_REGS;
13076 if ((rclass == TARGET_REGS
13077 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13078 && !satisfies_constraint_Csy (x)
13079 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13080 return GENERAL_REGS;
13081 if ((rclass == MAC_REGS || rclass == PR_REGS)
13082 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13083 && rclass != REGNO_REG_CLASS (REGNO (x)))
13084 return GENERAL_REGS;
13085 if (rclass != GENERAL_REGS && REG_P (x)
13086 && TARGET_REGISTER_P (REGNO (x)))
13087 return GENERAL_REGS;
13089 /* If here fall back to loading FPUL register through general registers.
13090 This case can happen when movsi_ie insn is picked initially to
13091 load/store the FPUL register from/to another register, and then the
13092 other register is allocated on the stack. */
13093 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13094 return GENERAL_REGS;
13096 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13097 the other operand.
13098 On SH2A could also just leave it alone here, which would result in a
13099 4 byte move insn being generated instead. However, for this to work
13100 the insns must have the appropriate alternatives. */
13101 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13102 && satisfies_constraint_Sdd (x)
13103 && disp_addr_displacement (x) <= max_mov_insn_displacement (mode, false))
13104 return R0_REGS;
13106 /* When reload is trying to address a QImode or HImode subreg on the stack,
13107 force any subreg byte into R0_REGS, as this is going to become a
13108 displacement address.
13109 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13110 is on the stack, the memref to it might already require a displacement
13111 and that has to be added to the final address. At this point we don't
13112 know the cumulative displacement so we assume the worst case. */
13113 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13114 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13115 return R0_REGS;
13117 return NO_REGS;
13120 static void
13121 sh_conditional_register_usage (void)
13123 int regno;
13124 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13125 if (! VALID_REGISTER_P (regno))
13126 fixed_regs[regno] = call_used_regs[regno] = 1;
13127 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13128 if (TARGET_SH5)
13130 call_used_regs[FIRST_GENERAL_REG + 8]
13131 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13132 call_really_used_regs[FIRST_GENERAL_REG + 8]
13133 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13135 if (TARGET_SHMEDIA)
13137 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13138 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13139 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13141 if (flag_pic)
13143 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13144 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13146 /* Renesas saves and restores mac registers on call. */
13147 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13149 call_really_used_regs[MACH_REG] = 0;
13150 call_really_used_regs[MACL_REG] = 0;
13153 if (TARGET_SHMEDIA)
13155 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13156 if (! fixed_regs[regno] && call_really_used_regs[regno])
13157 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13159 else
13160 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13161 if (! fixed_regs[regno] && call_really_used_regs[regno])
13162 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13165 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13167 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13168 static bool
13169 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
13171 return (TARGET_SHMEDIA
13172 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13173 || x == CONST0_RTX (mode)
13174 || !TARGET_SHMEDIA_FPU
13175 || TARGET_SHMEDIA64)
13176 : (GET_CODE (x) != CONST_DOUBLE
13177 || mode == DFmode || mode == SFmode
13178 || mode == DImode || GET_MODE (x) == VOIDmode));
13181 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13183 static void
13184 sh_init_sync_libfuncs (void)
13186 init_sync_libfuncs (UNITS_PER_WORD);
13189 /* Return true if it is appropriate to emit `ret' instructions in the
13190 body of a function. */
13191 bool
13192 sh_can_use_simple_return_p (void)
13194 HARD_REG_SET live_regs_mask;
13195 int d;
13197 /* Some targets require special return insns. */
13198 if (TARGET_SHMEDIA
13199 || (TARGET_SHCOMPACT
13200 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13201 return false;
13203 if (! reload_completed || frame_pointer_needed)
13204 return false;
13206 /* Moving prologue around does't reduce the size. */
13207 if (optimize_function_for_size_p (cfun))
13208 return false;
13210 /* Finally, allow for pr save. */
13211 d = calc_live_regs (&live_regs_mask);
13213 if (rounded_frame_size (d) > 4)
13214 return false;
13216 return true;
13219 /*------------------------------------------------------------------------------
13220 Address mode optimization support code
13223 typedef HOST_WIDE_INT disp_t;
13224 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13225 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13226 static const disp_t INVALID_DISP = MAX_DISP;
13228 /* A memory reference which is described by a base register and a
13229 displacement. */
13230 class base_reg_disp
13232 public:
13233 base_reg_disp (rtx br, disp_t d);
13235 bool is_reg (void) const;
13236 bool is_disp (void) const;
13237 rtx reg (void) const;
13238 disp_t disp (void) const;
13240 private:
13241 rtx reg_;
13242 disp_t disp_;
13245 inline
13246 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13247 : reg_ (br), disp_ (d)
13251 inline bool
13252 base_reg_disp::is_reg (void) const
13254 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13257 inline bool
13258 base_reg_disp::is_disp (void) const
13260 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13263 inline rtx
13264 base_reg_disp::reg (void) const
13266 return reg_;
13269 inline disp_t
13270 base_reg_disp::disp (void) const
13272 return disp_;
13275 /* Find the base register and calculate the displacement for a given
13276 address rtx 'x'.
13277 This is done by walking the insn list backwards and following SET insns
13278 that set the value of the specified reg 'x'. */
13279 static base_reg_disp
13280 sh_find_base_reg_disp (rtx insn, rtx x, disp_t disp = 0, rtx base_reg = NULL)
13282 if (REG_P (x))
13284 if (REGNO (x) == GBR_REG)
13285 return base_reg_disp (x, disp);
13287 /* We've reached a hard-reg. This is probably the point where
13288 function args are copied to pseudos. Do not go any further and
13289 stick to the pseudo. If the original mem addr was in a hard reg
13290 from the beginning, it will become the base reg. */
13291 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13292 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13294 /* Try to find the previous insn that sets the reg. */
13295 for (rtx i = prev_nonnote_insn (insn); i != NULL;
13296 i = prev_nonnote_insn (i))
13298 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)
13299 && CALL_P (i))
13300 break;
13302 if (!NONJUMP_INSN_P (i))
13303 continue;
13305 rtx p = PATTERN (i);
13306 if (p != NULL && GET_CODE (p) == SET && REG_P (XEXP (p, 0))
13307 && REGNO (XEXP (p, 0)) == REGNO (x))
13309 /* If the recursion can't find out any more details about the
13310 source of the set, then this reg becomes our new base reg. */
13311 return sh_find_base_reg_disp (i, XEXP (p, 1), disp, XEXP (p, 0));
13315 /* When here, no previous insn was found that sets the reg.
13316 The input reg is already the base reg. */
13317 return base_reg_disp (x, disp);
13320 else if (GET_CODE (x) == PLUS)
13322 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13323 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13325 /* Either left or right val must be a reg.
13326 We don't handle the case of 'reg + reg' here. */
13327 if (left_val.is_reg () && right_val.is_disp ())
13328 return base_reg_disp (left_val.reg (), left_val.disp ()
13329 + right_val.disp () + disp);
13330 else if (right_val.is_reg () && left_val.is_disp ())
13331 return base_reg_disp (right_val.reg (), right_val.disp ()
13332 + left_val.disp () + disp);
13333 else
13334 return base_reg_disp (base_reg, disp);
13337 else if (CONST_INT_P (x))
13338 return base_reg_disp (NULL, disp + INTVAL (x));
13340 /* Didn't find anything useful. */
13341 return base_reg_disp (base_reg, disp);
13344 /* Given an insn and a memory operand, try to find an equivalent GBR
13345 based memory address and return the corresponding new memory address.
13346 Return NULL_RTX if not found. */
13348 sh_find_equiv_gbr_addr (rtx insn, rtx mem)
13350 if (!MEM_P (mem))
13351 return NULL_RTX;
13353 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13354 if (side_effects_p (XEXP (mem, 0)))
13355 return NULL_RTX;
13357 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13359 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13361 rtx disp = GEN_INT (gbr_disp.disp ());
13362 if (gbr_displacement (disp, GET_MODE (mem)))
13363 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13366 return NULL_RTX;
13369 /*------------------------------------------------------------------------------
13370 Manual insn combine support code.
13373 /* Given a reg rtx and a start insn, try to find the insn that sets the
13374 specified reg by using the specified insn stepping function, such as
13375 'prev_nonnote_insn_bb'. When the insn is found, try to extract the rtx
13376 of the reg set. */
13377 set_of_reg
13378 sh_find_set_of_reg (rtx reg, rtx insn, rtx(*stepfunc)(rtx))
13380 set_of_reg result;
13381 result.insn = insn;
13382 result.set_rtx = NULL_RTX;
13383 result.set_src = NULL_RTX;
13385 if (!REG_P (reg) || insn == NULL_RTX)
13386 return result;
13388 for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
13389 result.insn = stepfunc (result.insn))
13391 if (BARRIER_P (result.insn))
13392 return result;
13393 if (!NONJUMP_INSN_P (result.insn))
13394 continue;
13395 if (reg_set_p (reg, result.insn))
13397 result.set_rtx = set_of (reg, result.insn);
13399 if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
13400 return result;
13402 result.set_src = XEXP (result.set_rtx, 1);
13403 return result;
13407 return result;
13410 /* Given an op rtx and an insn, try to find out whether the result of the
13411 specified op consists only of logical operations on T bit stores. */
13412 bool
13413 sh_is_logical_t_store_expr (rtx op, rtx insn)
13415 if (!logical_operator (op, SImode))
13416 return false;
13418 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13419 int op_is_t_count = 0;
13421 for (int i = 0; i < 2; ++i)
13423 if (t_reg_operand (ops[i], VOIDmode)
13424 || negt_reg_operand (ops[i], VOIDmode))
13425 op_is_t_count++;
13427 else
13429 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13430 prev_nonnote_insn_bb);
13431 if (op_set.set_src == NULL_RTX)
13432 continue;
13434 if (t_reg_operand (op_set.set_src, VOIDmode)
13435 || negt_reg_operand (op_set.set_src, VOIDmode)
13436 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13437 op_is_t_count++;
13441 return op_is_t_count == 2;
13444 /* Given the operand that is extended in a sign/zero extend insn, and the
13445 insn, try to figure out whether the sign/zero extension can be replaced
13446 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13447 NULL_RTX otherwise. */
13449 sh_try_omit_signzero_extend (rtx extended_op, rtx insn)
13451 if (REG_P (extended_op))
13452 extended_op = extended_op;
13453 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13454 extended_op = SUBREG_REG (extended_op);
13455 else
13456 return NULL_RTX;
13458 /* Reg moves must be of the same mode. */
13459 if (GET_MODE (extended_op) != SImode)
13460 return NULL_RTX;
13462 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13463 if (s.set_src == NULL_RTX)
13464 return NULL_RTX;
13466 if (t_reg_operand (s.set_src, VOIDmode)
13467 || negt_reg_operand (s.set_src, VOIDmode))
13468 return extended_op;
13470 /* If the zero extended reg was formed by a logical operation, check the
13471 operands of the logical operation. If both originated from T bit
13472 stores the zero extension can be eliminated. */
13473 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13474 return extended_op;
13476 return NULL_RTX;
13479 #include "gt-sh.h"