params 2 and 3 of reg_set_between_p
[official-gcc.git] / gcc / config / sh / sh.c
blob2f0f298bfbb1a7ea91c941f9522c48e2a22e5712
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2014 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
23 #include <vector>
24 #include <algorithm>
26 #include "config.h"
27 #include "system.h"
28 #include "coretypes.h"
29 #include "tm.h"
30 #include "insn-config.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "stringpool.h"
34 #include "stor-layout.h"
35 #include "calls.h"
36 #include "varasm.h"
37 #include "flags.h"
38 #include "expr.h"
39 #include "optabs.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "regs.h"
43 #include "hard-reg-set.h"
44 #include "output.h"
45 #include "insn-attr.h"
46 #include "diagnostic-core.h"
47 #include "recog.h"
48 #include "dwarf2.h"
49 #include "tm_p.h"
50 #include "target.h"
51 #include "target-def.h"
52 #include "langhooks.h"
53 #include "basic-block.h"
54 #include "df.h"
55 #include "intl.h"
56 #include "sched-int.h"
57 #include "params.h"
58 #include "ggc.h"
59 #include "hash-table.h"
60 #include "tree-ssa-alias.h"
61 #include "internal-fn.h"
62 #include "gimple-fold.h"
63 #include "tree-eh.h"
64 #include "gimple-expr.h"
65 #include "is-a.h"
66 #include "gimple.h"
67 #include "gimplify.h"
68 #include "cfgloop.h"
69 #include "alloc-pool.h"
70 #include "tm-constrs.h"
71 #include "opts.h"
72 #include "tree-pass.h"
73 #include "pass_manager.h"
74 #include "context.h"
75 #include "builtins.h"
77 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
79 /* These are some macros to abstract register modes. */
80 #define CONST_OK_FOR_I10(VALUE) (((HOST_WIDE_INT)(VALUE)) >= -512 \
81 && ((HOST_WIDE_INT)(VALUE)) <= 511)
83 #define CONST_OK_FOR_ADD(size) \
84 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
85 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
86 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
87 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
89 /* Used to simplify the logic below. Find the attributes wherever
90 they may be. */
91 #define SH_ATTRIBUTES(decl) \
92 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
93 : DECL_ATTRIBUTES (decl) \
94 ? (DECL_ATTRIBUTES (decl)) \
95 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
97 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
98 int current_function_interrupt;
100 tree sh_deferred_function_attributes;
101 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* Number of r0 life regions. */
117 static int r0_life_regions;
119 /* If true, skip cycles for Q -> R movement. */
120 static int skip_cycles = 0;
122 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
123 and returned from sh_reorder2. */
124 static short cached_can_issue_more;
126 /* Unique number for UNSPEC_BBR pattern. */
127 static unsigned int unspec_bbr_uid = 1;
129 /* Provides the class number of the smallest class containing
130 reg number. */
131 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
133 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
170 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
171 GENERAL_REGS, GENERAL_REGS,
174 char sh_register_names[FIRST_PSEUDO_REGISTER] \
175 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
177 char sh_additional_register_names[ADDREGNAMES_SIZE] \
178 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
179 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
181 int assembler_dialect;
183 static bool shmedia_space_reserved_for_target_registers;
185 static void split_branches (rtx_insn *);
186 static int branch_dest (rtx);
187 static void print_slot (rtx_sequence *);
188 static rtx_code_label *add_constant (rtx, enum machine_mode, rtx);
189 static void dump_table (rtx_insn *, rtx_insn *);
190 static bool broken_move (rtx_insn *);
191 static bool mova_p (rtx_insn *);
192 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
193 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
194 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
195 static void sh_reorg (void);
196 static void sh_option_override (void);
197 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
198 static rtx_insn *frame_insn (rtx);
199 static rtx push (int);
200 static void pop (int);
201 static void push_regs (HARD_REG_SET *, int);
202 static int calc_live_regs (HARD_REG_SET *);
203 static HOST_WIDE_INT rounded_frame_size (int);
204 static bool sh_frame_pointer_required (void);
205 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
206 static int sh_mode_needed (int, rtx_insn *);
207 static int sh_mode_after (int, int, rtx_insn *);
208 static int sh_mode_entry (int);
209 static int sh_mode_exit (int);
210 static int sh_mode_priority (int entity, int n);
212 static rtx mark_constant_pool_use (rtx);
213 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
214 int, bool *);
215 static tree sh_handle_resbank_handler_attribute (tree *, tree,
216 tree, int, bool *);
217 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
218 tree, int, bool *);
219 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
220 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
221 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
222 static void sh_print_operand (FILE *, rtx, int);
223 static void sh_print_operand_address (FILE *, rtx);
224 static bool sh_print_operand_punct_valid_p (unsigned char code);
225 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
226 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
227 static void sh_insert_attributes (tree, tree *);
228 static const char *sh_check_pch_target_flags (int);
229 static int sh_register_move_cost (enum machine_mode, reg_class_t, reg_class_t);
230 static int sh_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
231 static int sh_issue_rate (void);
232 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
233 static short find_set_regmode_weight (rtx, enum machine_mode);
234 static short find_insn_regmode_weight (rtx, enum machine_mode);
235 static void find_regmode_weight (basic_block, enum machine_mode);
236 static int find_r0_life_regions (basic_block);
237 static void sh_md_init_global (FILE *, int, int);
238 static void sh_md_finish_global (FILE *, int);
239 static int rank_for_reorder (const void *, const void *);
240 static void swap_reorder (rtx_insn **, int);
241 static void ready_reorder (rtx_insn **, int);
242 static bool high_pressure (enum machine_mode);
243 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
244 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
245 static void sh_md_init (FILE *, int, int);
246 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
248 static bool sh_function_ok_for_sibcall (tree, tree);
250 static bool sh_cannot_modify_jumps_p (void);
251 static reg_class_t sh_target_reg_class (void);
252 static bool sh_optimize_target_register_callee_saved (bool);
253 static bool sh_ms_bitfield_layout_p (const_tree);
255 static void sh_init_builtins (void);
256 static tree sh_builtin_decl (unsigned, bool);
257 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
258 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
259 HOST_WIDE_INT, tree);
260 static void sh_file_start (void);
261 static bool flow_dependent_p (rtx, rtx);
262 static void flow_dependent_p_1 (rtx, const_rtx, void *);
263 static int shiftcosts (rtx);
264 static int and_xor_ior_costs (rtx, int);
265 static int addsubcosts (rtx);
266 static int multcosts (rtx);
267 static bool unspec_caller_rtx_p (rtx);
268 static bool sh_cannot_copy_insn_p (rtx_insn *);
269 static bool sh_rtx_costs (rtx, int, int, int, int *, bool);
270 static int sh_address_cost (rtx, enum machine_mode, addr_space_t, bool);
271 static int sh_pr_n_sets (void);
272 static rtx sh_allocate_initial_value (rtx);
273 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
274 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
275 enum machine_mode,
276 struct secondary_reload_info *);
277 static bool sh_legitimate_address_p (enum machine_mode, rtx, bool);
278 static rtx sh_legitimize_address (rtx, rtx, enum machine_mode);
279 static rtx sh_delegitimize_address (rtx);
280 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
281 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
282 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
283 static int scavenge_reg (HARD_REG_SET *s);
284 struct save_schedule_s;
285 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
286 struct save_schedule_s *, int);
288 static rtx sh_struct_value_rtx (tree, int);
289 static rtx sh_function_value (const_tree, const_tree, bool);
290 static bool sh_function_value_regno_p (const unsigned int);
291 static rtx sh_libcall_value (enum machine_mode, const_rtx);
292 static bool sh_return_in_memory (const_tree, const_tree);
293 static rtx sh_builtin_saveregs (void);
294 static void sh_setup_incoming_varargs (cumulative_args_t, enum machine_mode,
295 tree, int *, int);
296 static bool sh_strict_argument_naming (cumulative_args_t);
297 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
298 static tree sh_build_builtin_va_list (void);
299 static void sh_va_start (tree, rtx);
300 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
301 static bool sh_promote_prototypes (const_tree);
302 static enum machine_mode sh_promote_function_mode (const_tree type,
303 enum machine_mode,
304 int *punsignedp,
305 const_tree funtype,
306 int for_return);
307 static bool sh_pass_by_reference (cumulative_args_t, enum machine_mode,
308 const_tree, bool);
309 static bool sh_callee_copies (cumulative_args_t, enum machine_mode,
310 const_tree, bool);
311 static int sh_arg_partial_bytes (cumulative_args_t, enum machine_mode,
312 tree, bool);
313 static void sh_function_arg_advance (cumulative_args_t, enum machine_mode,
314 const_tree, bool);
315 static rtx sh_function_arg (cumulative_args_t, enum machine_mode,
316 const_tree, bool);
317 static bool sh_scalar_mode_supported_p (enum machine_mode);
318 static int sh_dwarf_calling_convention (const_tree);
319 static void sh_encode_section_info (tree, rtx, int);
320 static bool sh2a_function_vector_p (tree);
321 static void sh_trampoline_init (rtx, tree, rtx);
322 static rtx sh_trampoline_adjust_address (rtx);
323 static void sh_conditional_register_usage (void);
324 static bool sh_legitimate_constant_p (enum machine_mode, rtx);
325 static int mov_insn_size (enum machine_mode, bool);
326 static int mov_insn_alignment_mask (enum machine_mode, bool);
327 static bool sequence_insn_p (rtx_insn *);
328 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
329 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
330 enum machine_mode, bool);
331 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
333 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
335 static const struct attribute_spec sh_attribute_table[] =
337 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
338 affects_type_identity } */
339 { "interrupt_handler", 0, 0, true, false, false,
340 sh_handle_interrupt_handler_attribute, false },
341 { "sp_switch", 1, 1, true, false, false,
342 sh_handle_sp_switch_attribute, false },
343 { "trap_exit", 1, 1, true, false, false,
344 sh_handle_trap_exit_attribute, false },
345 { "renesas", 0, 0, false, true, false,
346 sh_handle_renesas_attribute, false },
347 { "trapa_handler", 0, 0, true, false, false,
348 sh_handle_interrupt_handler_attribute, false },
349 { "nosave_low_regs", 0, 0, true, false, false,
350 sh_handle_interrupt_handler_attribute, false },
351 { "resbank", 0, 0, true, false, false,
352 sh_handle_resbank_handler_attribute, false },
353 { "function_vector", 1, 1, true, false, false,
354 sh2a_handle_function_vector_handler_attribute, false },
355 { NULL, 0, 0, false, false, false, NULL, false }
358 /* Initialize the GCC target structure. */
359 #undef TARGET_ATTRIBUTE_TABLE
360 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
362 /* The next two are used for debug info when compiling with -gdwarf. */
363 #undef TARGET_ASM_UNALIGNED_HI_OP
364 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
365 #undef TARGET_ASM_UNALIGNED_SI_OP
366 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
368 /* These are NULLed out on non-SH5 in TARGET_OPTION_OVERRIDE. */
369 #undef TARGET_ASM_UNALIGNED_DI_OP
370 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
371 #undef TARGET_ASM_ALIGNED_DI_OP
372 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
374 #undef TARGET_OPTION_OVERRIDE
375 #define TARGET_OPTION_OVERRIDE sh_option_override
377 #undef TARGET_PRINT_OPERAND
378 #define TARGET_PRINT_OPERAND sh_print_operand
379 #undef TARGET_PRINT_OPERAND_ADDRESS
380 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
381 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
382 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
383 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
384 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
386 #undef TARGET_ASM_FUNCTION_EPILOGUE
387 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
389 #undef TARGET_ASM_OUTPUT_MI_THUNK
390 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
392 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
393 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
394 hook_bool_const_tree_hwi_hwi_const_tree_true
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START sh_file_start
398 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
399 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
401 #undef TARGET_REGISTER_MOVE_COST
402 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
404 #undef TARGET_INSERT_ATTRIBUTES
405 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
407 #undef TARGET_SCHED_ADJUST_COST
408 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
410 #undef TARGET_SCHED_ISSUE_RATE
411 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
413 /* The next 5 hooks have been implemented for reenabling sched1. With the
414 help of these macros we are limiting the movement of insns in sched1 to
415 reduce the register pressure. The overall idea is to keep count of SImode
416 and SFmode regs required by already scheduled insns. When these counts
417 cross some threshold values; give priority to insns that free registers.
418 The insn that frees registers is most likely to be the insn with lowest
419 LUID (original insn order); but such an insn might be there in the stalled
420 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
421 up to a max of 8 cycles so that such insns may move from Q -> R.
423 The description of the hooks are as below:
425 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
426 scheduler; it is called inside the sched_init function just after
427 find_insn_reg_weights function call. It is used to calculate the SImode
428 and SFmode weights of insns of basic blocks; much similar to what
429 find_insn_reg_weights does.
430 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
432 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
433 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
434 (Q)->(R).
436 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
437 high; reorder the ready queue so that the insn with lowest LUID will be
438 issued next.
440 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
441 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
443 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
444 can be returned from TARGET_SCHED_REORDER2.
446 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
448 #undef TARGET_SCHED_DFA_NEW_CYCLE
449 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
451 #undef TARGET_SCHED_INIT_GLOBAL
452 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
454 #undef TARGET_SCHED_FINISH_GLOBAL
455 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
457 #undef TARGET_SCHED_VARIABLE_ISSUE
458 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
460 #undef TARGET_SCHED_REORDER
461 #define TARGET_SCHED_REORDER sh_reorder
463 #undef TARGET_SCHED_REORDER2
464 #define TARGET_SCHED_REORDER2 sh_reorder2
466 #undef TARGET_SCHED_INIT
467 #define TARGET_SCHED_INIT sh_md_init
469 #undef TARGET_DELEGITIMIZE_ADDRESS
470 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
472 #undef TARGET_LEGITIMIZE_ADDRESS
473 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
475 #undef TARGET_CANNOT_MODIFY_JUMPS_P
476 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
477 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
478 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
479 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
480 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
481 sh_optimize_target_register_callee_saved
483 #undef TARGET_MS_BITFIELD_LAYOUT_P
484 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
486 #undef TARGET_INIT_BUILTINS
487 #define TARGET_INIT_BUILTINS sh_init_builtins
488 #undef TARGET_BUILTIN_DECL
489 #define TARGET_BUILTIN_DECL sh_builtin_decl
490 #undef TARGET_EXPAND_BUILTIN
491 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
493 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
494 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
496 #undef TARGET_CANNOT_COPY_INSN_P
497 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
498 #undef TARGET_RTX_COSTS
499 #define TARGET_RTX_COSTS sh_rtx_costs
500 #undef TARGET_ADDRESS_COST
501 #define TARGET_ADDRESS_COST sh_address_cost
502 #undef TARGET_ALLOCATE_INITIAL_VALUE
503 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
505 #undef TARGET_MACHINE_DEPENDENT_REORG
506 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
508 #undef TARGET_DWARF_REGISTER_SPAN
509 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
511 #ifdef HAVE_AS_TLS
512 #undef TARGET_HAVE_TLS
513 #define TARGET_HAVE_TLS true
514 #endif
516 #undef TARGET_PROMOTE_PROTOTYPES
517 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
518 #undef TARGET_PROMOTE_FUNCTION_MODE
519 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
521 #undef TARGET_FUNCTION_VALUE
522 #define TARGET_FUNCTION_VALUE sh_function_value
523 #undef TARGET_FUNCTION_VALUE_REGNO_P
524 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
525 #undef TARGET_LIBCALL_VALUE
526 #define TARGET_LIBCALL_VALUE sh_libcall_value
527 #undef TARGET_STRUCT_VALUE_RTX
528 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
529 #undef TARGET_RETURN_IN_MEMORY
530 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
532 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
533 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
534 #undef TARGET_SETUP_INCOMING_VARARGS
535 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
536 #undef TARGET_STRICT_ARGUMENT_NAMING
537 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
538 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
539 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
540 #undef TARGET_MUST_PASS_IN_STACK
541 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
542 #undef TARGET_PASS_BY_REFERENCE
543 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
544 #undef TARGET_CALLEE_COPIES
545 #define TARGET_CALLEE_COPIES sh_callee_copies
546 #undef TARGET_ARG_PARTIAL_BYTES
547 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
548 #undef TARGET_FUNCTION_ARG
549 #define TARGET_FUNCTION_ARG sh_function_arg
550 #undef TARGET_FUNCTION_ARG_ADVANCE
551 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
553 #undef TARGET_BUILD_BUILTIN_VA_LIST
554 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
555 #undef TARGET_EXPAND_BUILTIN_VA_START
556 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
557 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
558 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
560 #undef TARGET_SCALAR_MODE_SUPPORTED_P
561 #define TARGET_SCALAR_MODE_SUPPORTED_P sh_scalar_mode_supported_p
562 #undef TARGET_VECTOR_MODE_SUPPORTED_P
563 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
565 #undef TARGET_CHECK_PCH_TARGET_FLAGS
566 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
568 #undef TARGET_DWARF_CALLING_CONVENTION
569 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
571 #undef TARGET_FRAME_POINTER_REQUIRED
572 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
574 #undef TARGET_MODE_EMIT
575 #define TARGET_MODE_EMIT sh_emit_mode_set
577 #undef TARGET_MODE_NEEDED
578 #define TARGET_MODE_NEEDED sh_mode_needed
580 #undef TARGET_MODE_AFTER
581 #define TARGET_MODE_AFTER sh_mode_after
583 #undef TARGET_MODE_ENTRY
584 #define TARGET_MODE_ENTRY sh_mode_entry
586 #undef TARGET_MODE_EXIT
587 #define TARGET_MODE_EXIT sh_mode_exit
589 #undef TARGET_MODE_PRIORITY
590 #define TARGET_MODE_PRIORITY sh_mode_priority
592 /* Return regmode weight for insn. */
593 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
594 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
596 /* Return current register pressure for regmode. */
597 #define CURR_REGMODE_PRESSURE(MODE)\
598 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
600 #undef TARGET_ENCODE_SECTION_INFO
601 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
603 #undef TARGET_SECONDARY_RELOAD
604 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
606 #undef TARGET_PREFERRED_RELOAD_CLASS
607 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
609 #undef TARGET_CONDITIONAL_REGISTER_USAGE
610 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
612 #undef TARGET_LEGITIMATE_ADDRESS_P
613 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
615 #undef TARGET_TRAMPOLINE_INIT
616 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
617 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
618 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
620 #undef TARGET_LEGITIMATE_CONSTANT_P
621 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
623 #undef TARGET_CANONICALIZE_COMPARISON
624 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
626 #undef TARGET_FIXED_CONDITION_CODE_REGS
627 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
629 /* Machine-specific symbol_ref flags. */
630 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
632 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
633 is used by optabs.c atomic op expansion code as well as in sync.md. */
634 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
635 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
637 struct gcc_target targetm = TARGET_INITIALIZER;
640 /* Information on the currently selected atomic model.
641 This is initialized in sh_option_override. */
642 static sh_atomic_model selected_atomic_model_;
644 const sh_atomic_model&
645 selected_atomic_model (void)
647 return selected_atomic_model_;
650 static sh_atomic_model
651 parse_validate_atomic_model_option (const char* str)
653 const char* model_names[sh_atomic_model::num_models];
654 model_names[sh_atomic_model::none] = "none";
655 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
656 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
657 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
658 model_names[sh_atomic_model::soft_imask] = "soft-imask";
660 const char* model_cdef_names[sh_atomic_model::num_models];
661 model_cdef_names[sh_atomic_model::none] = "NONE";
662 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
663 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
664 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
665 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
667 sh_atomic_model ret;
668 ret.type = sh_atomic_model::none;
669 ret.name = model_names[sh_atomic_model::none];
670 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
671 ret.strict = false;
672 ret.tcb_gbr_offset = -1;
674 /* Handle empty string as 'none'. */
675 if (str == NULL || *str == '\0')
676 return ret;
678 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
680 std::vector<std::string> tokens;
681 for (std::stringstream ss (str); ss.good (); )
683 tokens.push_back (std::string ());
684 std::getline (ss, tokens.back (), ',');
687 if (tokens.empty ())
688 err_ret ("invalid atomic model option");
690 /* The first token must be the atomic model name. */
692 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
693 if (tokens.front () == model_names[i])
695 ret.type = (sh_atomic_model::enum_type)i;
696 ret.name = model_names[i];
697 ret.cdef_name = model_cdef_names[i];
698 goto got_mode_name;
701 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
702 got_mode_name:;
705 /* Go through the remaining tokens. */
706 for (size_t i = 1; i < tokens.size (); ++i)
708 if (tokens[i] == "strict")
709 ret.strict = true;
710 else if (tokens[i].find ("gbr-offset=") == 0)
712 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
713 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
714 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
715 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
716 "option", offset_str.c_str ());
718 else
719 err_ret ("unknown parameter \"%s\" in atomic model option",
720 tokens[i].c_str ());
723 /* Check that the selection makes sense. */
724 if (TARGET_SHMEDIA && ret.type != sh_atomic_model::none)
725 err_ret ("atomic operations are not supported on SHmedia");
727 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
728 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
729 ret.name);
731 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
732 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
734 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
735 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
737 if (ret.type == sh_atomic_model::soft_tcb
738 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
739 || (ret.tcb_gbr_offset & 3) != 0))
740 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
741 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
742 ret.name);
744 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
745 err_ret ("cannot use atomic model %s in user mode", ret.name);
747 return ret;
749 #undef err_ret
752 /* Register SH specific RTL passes. */
753 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
754 const char* name);
755 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
756 const char* name);
757 static void
758 register_sh_passes (void)
760 if (!TARGET_SH1)
761 return;
763 /* Running the sh_treg_combine pass after ce1 generates better code when
764 comparisons are combined and reg-reg moves are introduced, because
765 reg-reg moves will be eliminated afterwards. However, there are quite
766 some cases where combine will be unable to fold comparison related insns,
767 thus for now don't do it.
768 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
769 PASS_POS_INSERT_AFTER, "ce1", 1);
772 /* Run sh_treg_combine pass after combine but before register allocation. */
773 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
774 PASS_POS_INSERT_AFTER, "split1", 1);
776 /* Run sh_treg_combine pass after register allocation and basic block
777 reordering as this sometimes creates new opportunities. */
778 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
779 PASS_POS_INSERT_AFTER, "split4", 1);
781 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
782 is known after a conditional branch.
783 This must be done after basic blocks and branch conditions have
784 stabilized and won't be changed by further passes. */
785 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
786 PASS_POS_INSERT_BEFORE, "sched2", 1);
789 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
790 various options, and do some machine dependent initialization. */
791 static void
792 sh_option_override (void)
794 int regno;
796 SUBTARGET_OVERRIDE_OPTIONS;
797 if (optimize > 1 && !optimize_size)
798 target_flags |= MASK_SAVE_ALL_TARGET_REGS;
800 /* Set default values of TARGET_CBRANCHDI4 and TARGET_CMPEQDI_T. */
801 TARGET_CBRANCHDI4 = 1;
802 TARGET_CMPEQDI_T = 0;
804 sh_cpu = PROCESSOR_SH1;
805 assembler_dialect = 0;
806 if (TARGET_SH2)
807 sh_cpu = PROCESSOR_SH2;
808 if (TARGET_SH2E)
809 sh_cpu = PROCESSOR_SH2E;
810 if (TARGET_SH2A)
811 sh_cpu = PROCESSOR_SH2A;
812 if (TARGET_SH3)
813 sh_cpu = PROCESSOR_SH3;
814 if (TARGET_SH3E)
815 sh_cpu = PROCESSOR_SH3E;
816 if (TARGET_SH4)
818 assembler_dialect = 1;
819 sh_cpu = PROCESSOR_SH4;
821 if (TARGET_SH4A_ARCH)
823 assembler_dialect = 1;
824 sh_cpu = PROCESSOR_SH4A;
826 if (TARGET_SH5)
828 sh_cpu = PROCESSOR_SH5;
829 target_flags |= MASK_ALIGN_DOUBLE;
830 if (TARGET_SHMEDIA_FPU)
831 target_flags |= MASK_FMOVD;
832 if (TARGET_SHMEDIA)
834 /* There are no delay slots on SHmedia. */
835 flag_delayed_branch = 0;
836 /* Relaxation isn't yet supported for SHmedia */
837 target_flags &= ~MASK_RELAX;
838 /* After reload, if conversion does little good but can cause
839 ICEs:
840 - find_if_block doesn't do anything for SH because we don't
841 have conditional execution patterns. (We use conditional
842 move patterns, which are handled differently, and only
843 before reload).
844 - find_cond_trap doesn't do anything for the SH because we
845 don't have conditional traps.
846 - find_if_case_1 uses redirect_edge_and_branch_force in
847 the only path that does an optimization, and this causes
848 an ICE when branch targets are in registers.
849 - find_if_case_2 doesn't do anything for the SHmedia after
850 reload except when it can redirect a tablejump - and
851 that's rather rare. */
852 flag_if_conversion2 = 0;
853 if (! strcmp (sh_div_str, "call"))
854 sh_div_strategy = SH_DIV_CALL;
855 else if (! strcmp (sh_div_str, "call2"))
856 sh_div_strategy = SH_DIV_CALL2;
857 if (! strcmp (sh_div_str, "fp") && TARGET_FPU_ANY)
858 sh_div_strategy = SH_DIV_FP;
859 else if (! strcmp (sh_div_str, "inv"))
860 sh_div_strategy = SH_DIV_INV;
861 else if (! strcmp (sh_div_str, "inv:minlat"))
862 sh_div_strategy = SH_DIV_INV_MINLAT;
863 else if (! strcmp (sh_div_str, "inv20u"))
864 sh_div_strategy = SH_DIV_INV20U;
865 else if (! strcmp (sh_div_str, "inv20l"))
866 sh_div_strategy = SH_DIV_INV20L;
867 else if (! strcmp (sh_div_str, "inv:call2"))
868 sh_div_strategy = SH_DIV_INV_CALL2;
869 else if (! strcmp (sh_div_str, "inv:call"))
870 sh_div_strategy = SH_DIV_INV_CALL;
871 else if (! strcmp (sh_div_str, "inv:fp"))
873 if (TARGET_FPU_ANY)
874 sh_div_strategy = SH_DIV_INV_FP;
875 else
876 sh_div_strategy = SH_DIV_INV;
878 TARGET_CBRANCHDI4 = 0;
879 /* Assembler CFI isn't yet fully supported for SHmedia. */
880 flag_dwarf2_cfi_asm = 0;
883 else
885 /* Only the sh64-elf assembler fully supports .quad properly. */
886 targetm.asm_out.aligned_op.di = NULL;
887 targetm.asm_out.unaligned_op.di = NULL;
890 /* User/priviledged mode is supported only on SH3*, SH4* and SH5*.
891 Disable it for everything else. */
892 if (! (TARGET_SH3 || TARGET_SH5) && TARGET_USERMODE)
893 TARGET_USERMODE = false;
895 if (TARGET_SH1)
897 if (! strcmp (sh_div_str, "call-div1"))
898 sh_div_strategy = SH_DIV_CALL_DIV1;
899 else if (! strcmp (sh_div_str, "call-fp")
900 && (TARGET_FPU_DOUBLE || TARGET_FPU_SINGLE_ONLY
901 || (TARGET_SHCOMPACT && TARGET_FPU_ANY)))
902 sh_div_strategy = SH_DIV_CALL_FP;
903 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
904 sh_div_strategy = SH_DIV_CALL_TABLE;
905 else
906 /* Pick one that makes most sense for the target in general.
907 It is not much good to use different functions depending
908 on -Os, since then we'll end up with two different functions
909 when some of the code is compiled for size, and some for
910 speed. */
912 /* SH4 tends to emphasize speed. */
913 if (TARGET_HARD_SH4)
914 sh_div_strategy = SH_DIV_CALL_TABLE;
915 /* These have their own way of doing things. */
916 else if (TARGET_SH2A)
917 sh_div_strategy = SH_DIV_INTRINSIC;
918 /* ??? Should we use the integer SHmedia function instead? */
919 else if (TARGET_SHCOMPACT && TARGET_FPU_ANY)
920 sh_div_strategy = SH_DIV_CALL_FP;
921 /* SH1 .. SH3 cores often go into small-footprint systems, so
922 default to the smallest implementation available. */
923 else
924 sh_div_strategy = SH_DIV_CALL_DIV1;
926 if (!TARGET_SH1)
927 TARGET_PRETEND_CMOVE = 0;
928 if (sh_divsi3_libfunc[0])
929 ; /* User supplied - leave it alone. */
930 else if (TARGET_DIVIDE_CALL_FP)
931 sh_divsi3_libfunc = "__sdivsi3_i4";
932 else if (TARGET_DIVIDE_CALL_TABLE)
933 sh_divsi3_libfunc = "__sdivsi3_i4i";
934 else if (TARGET_SH5)
935 sh_divsi3_libfunc = "__sdivsi3_1";
936 else
937 sh_divsi3_libfunc = "__sdivsi3";
939 if (sh_branch_cost == -1)
941 /* The SH1 does not have delay slots, hence we get a pipeline stall
942 at every branch. The SH4 is superscalar, so the single delay slot
943 is not sufficient to keep both pipelines filled.
944 In any case, set the default branch cost to '2', as it results in
945 slightly overall smaller code and also enables some if conversions
946 that are required for matching special T bit related insns. */
947 sh_branch_cost = 2;
950 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
951 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
952 TARGET_ZDCBRANCH = 1;
954 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
955 if (! VALID_REGISTER_P (regno))
956 sh_register_names[regno][0] = '\0';
958 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
959 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
960 sh_additional_register_names[regno][0] = '\0';
962 if ((flag_pic && ! TARGET_PREFERGOT)
963 || (TARGET_SHMEDIA && !TARGET_PT_FIXED))
964 flag_no_function_cse = 1;
966 if (targetm.small_register_classes_for_mode_p (VOIDmode))
968 /* Never run scheduling before reload, since that can
969 break global alloc, and generates slower code anyway due
970 to the pressure on R0. */
971 /* Enable sched1 for SH4 if the user explicitly requests.
972 When sched1 is enabled, the ready queue will be reordered by
973 the target hooks if pressure is high. We can not do this for
974 PIC, SH3 and lower as they give spill failures for R0. */
975 if (!TARGET_HARD_SH4 || flag_pic)
976 flag_schedule_insns = 0;
977 /* ??? Current exception handling places basic block boundaries
978 after call_insns. It causes the high pressure on R0 and gives
979 spill failures for R0 in reload. See PR 22553 and the thread
980 on gcc-patches
981 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
982 else if (flag_exceptions)
984 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
985 warning (0, "ignoring -fschedule-insns because of exception "
986 "handling bug");
987 flag_schedule_insns = 0;
989 else if (flag_schedule_insns
990 && !global_options_set.x_flag_schedule_insns)
991 flag_schedule_insns = 0;
994 /* Unwind info is not correct around the CFG unless either a frame
995 pointer is present or M_A_O_A is set. Fixing this requires rewriting
996 unwind info generation to be aware of the CFG and propagating states
997 around edges. */
998 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
999 || flag_exceptions || flag_non_call_exceptions)
1000 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
1002 warning (0, "unwind tables currently require either a frame pointer "
1003 "or -maccumulate-outgoing-args for correctness");
1004 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
1007 /* Unwinding with -freorder-blocks-and-partition does not work on this
1008 architecture, because it requires far jumps to label crossing between
1009 hot/cold sections which are rejected on this architecture. */
1010 if (flag_reorder_blocks_and_partition)
1012 if (flag_exceptions)
1014 inform (input_location,
1015 "-freorder-blocks-and-partition does not work with "
1016 "exceptions on this architecture");
1017 flag_reorder_blocks_and_partition = 0;
1018 flag_reorder_blocks = 1;
1020 else if (flag_unwind_tables)
1022 inform (input_location,
1023 "-freorder-blocks-and-partition does not support unwind "
1024 "info on this architecture");
1025 flag_reorder_blocks_and_partition = 0;
1026 flag_reorder_blocks = 1;
1030 /* Adjust loop, jump and function alignment values (in bytes), if those
1031 were not specified by the user using -falign-loops, -falign-jumps
1032 and -falign-functions options.
1033 32 bit alignment is better for speed, because instructions can be
1034 fetched as a pair from a longword boundary. For size use 16 bit
1035 alignment to get more compact code.
1036 Aligning all jumps increases the code size, even if it might
1037 result in slightly faster code. Thus, it is set to the smallest
1038 alignment possible if not specified by the user. */
1039 if (align_loops == 0)
1041 if (TARGET_SH5)
1042 align_loops = 8;
1043 else
1044 align_loops = optimize_size ? 2 : 4;
1047 if (align_jumps == 0)
1049 if (TARGET_SHMEDIA)
1050 align_jumps = 1 << CACHE_LOG;
1051 else
1052 align_jumps = 2;
1054 else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))
1055 align_jumps = TARGET_SHMEDIA ? 4 : 2;
1057 if (align_functions == 0)
1059 if (TARGET_SHMEDIA)
1060 align_functions = optimize_size
1061 ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);
1062 else
1063 align_functions = optimize_size ? 2 : 4;
1066 /* The linker relaxation code breaks when a function contains
1067 alignments that are larger than that at the start of a
1068 compilation unit. */
1069 if (TARGET_RELAX)
1071 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1073 /* Also take possible .long constants / mova tables into account. */
1074 if (min_align < 4)
1075 min_align = 4;
1076 if (align_functions < min_align)
1077 align_functions = min_align;
1080 if (flag_unsafe_math_optimizations)
1082 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
1083 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
1084 TARGET_FSCA = 1;
1086 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
1087 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
1088 TARGET_FSRRA = 1;
1091 /* Allow fsrra insn only if -funsafe-math-optimizations and
1092 -ffinite-math-only is enabled. */
1093 TARGET_FSRRA = TARGET_FSRRA
1094 && flag_unsafe_math_optimizations
1095 && flag_finite_math_only;
1097 /* If the -mieee option was not explicitly set by the user, turn it on
1098 unless -ffinite-math-only was specified. See also PR 33135. */
1099 if (! global_options_set.x_TARGET_IEEE)
1100 TARGET_IEEE = ! flag_finite_math_only;
1102 if (sh_fixed_range_str)
1103 sh_fix_range (sh_fixed_range_str);
1105 /* This target defaults to strict volatile bitfields. */
1106 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
1107 flag_strict_volatile_bitfields = 1;
1109 /* Parse atomic model option and make sure it is valid for the current
1110 target CPU. */
1111 selected_atomic_model_
1112 = parse_validate_atomic_model_option (sh_atomic_model_str);
1114 register_sh_passes ();
1117 /* Print the operand address in x to the stream. */
1118 static void
1119 sh_print_operand_address (FILE *stream, rtx x)
1121 switch (GET_CODE (x))
1123 case REG:
1124 case SUBREG:
1125 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1126 break;
1128 case PLUS:
1130 rtx base = XEXP (x, 0);
1131 rtx index = XEXP (x, 1);
1133 switch (GET_CODE (index))
1135 case CONST_INT:
1136 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1137 reg_names[true_regnum (base)]);
1138 break;
1140 case REG:
1141 case SUBREG:
1143 int base_num = true_regnum (base);
1144 int index_num = true_regnum (index);
1146 fprintf (stream, "@(r0,%s)",
1147 reg_names[MAX (base_num, index_num)]);
1148 break;
1151 default:
1152 gcc_unreachable ();
1155 break;
1157 case PRE_DEC:
1158 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1159 break;
1161 case POST_INC:
1162 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1163 break;
1165 default:
1166 x = mark_constant_pool_use (x);
1167 output_addr_const (stream, x);
1168 break;
1172 /* Print operand x (an rtx) in assembler syntax to file stream
1173 according to modifier code.
1175 '.' print a .s if insn needs delay slot
1176 ',' print LOCAL_LABEL_PREFIX
1177 '@' print trap, rte or rts depending upon pragma interruptness
1178 '#' output a nop if there is nothing to put in the delay slot
1179 ''' print likelihood suffix (/u for unlikely).
1180 '>' print branch target if -fverbose-asm
1181 'O' print a constant without the #
1182 'R' print the LSW of a dp value - changes if in little endian
1183 'S' print the MSW of a dp value - changes if in little endian
1184 'T' print the next word of a dp value - same as 'R' in big endian mode.
1185 'M' SHMEDIA: print an `x' if `m' will print `base,index'.
1186 otherwise: print .b / .w / .l / .s / .d suffix if operand is a MEM.
1187 'N' print 'r63' if the operand is (const_int 0).
1188 'd' print a V2SF reg as dN instead of fpN.
1189 'm' print a pair `base,offset' or `base,index', for LD and ST.
1190 'U' Likewise for {LD,ST}{HI,LO}.
1191 'V' print the position of a single bit set.
1192 'W' print the position of a single bit cleared.
1193 't' print a memory address which is a register.
1194 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1195 'o' output an operator. */
1196 static void
1197 sh_print_operand (FILE *stream, rtx x, int code)
1199 int regno;
1200 enum machine_mode mode;
1202 switch (code)
1204 tree trapa_attr;
1206 case '.':
1207 if (final_sequence
1208 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1209 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1210 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1211 break;
1212 case ',':
1213 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1214 break;
1215 case '@':
1216 trapa_attr = lookup_attribute ("trap_exit",
1217 DECL_ATTRIBUTES (current_function_decl));
1218 if (trapa_attr)
1219 fprintf (stream, "trapa #%ld",
1220 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1221 else if (sh_cfun_interrupt_handler_p ())
1223 if (sh_cfun_resbank_handler_p ())
1224 fprintf (stream, "resbank\n");
1225 fprintf (stream, "rte");
1227 else
1228 fprintf (stream, "rts");
1229 break;
1230 case '#':
1231 /* Output a nop if there's nothing in the delay slot. */
1232 if (dbr_sequence_length () == 0)
1233 fprintf (stream, "\n\tnop");
1234 break;
1235 case '\'':
1237 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1239 if (note && XINT (note, 0) * 2 < REG_BR_PROB_BASE)
1240 fputs ("/u", stream);
1241 break;
1243 case '>':
1244 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1246 fputs ("\t! target: ", stream);
1247 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1249 break;
1250 case 'O':
1251 x = mark_constant_pool_use (x);
1252 output_addr_const (stream, x);
1253 break;
1254 /* N.B.: %R / %S / %T adjust memory addresses by four.
1255 For SHMEDIA, that means they can be used to access the first and
1256 second 32 bit part of a 64 bit (or larger) value that
1257 might be held in floating point registers or memory.
1258 While they can be used to access 64 bit parts of a larger value
1259 held in general purpose registers, that won't work with memory -
1260 neither for fp registers, since the frxx names are used. */
1261 case 'R':
1262 if (REG_P (x) || GET_CODE (x) == SUBREG)
1264 regno = true_regnum (x);
1265 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1266 fputs (reg_names[regno], (stream));
1268 else if (MEM_P (x))
1270 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1271 sh_print_operand_address (stream, XEXP (x, 0));
1273 else
1275 rtx sub = NULL_RTX;
1277 mode = GET_MODE (x);
1278 if (mode == VOIDmode)
1279 mode = DImode;
1280 if (GET_MODE_SIZE (mode) >= 8)
1281 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1282 if (sub)
1283 sh_print_operand (stream, sub, 0);
1284 else
1285 output_operand_lossage ("invalid operand to %%R");
1287 break;
1288 case 'S':
1289 if (REG_P (x) || GET_CODE (x) == SUBREG)
1291 regno = true_regnum (x);
1292 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1293 fputs (reg_names[regno], (stream));
1295 else if (MEM_P (x))
1297 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1298 sh_print_operand_address (stream, XEXP (x, 0));
1300 else
1302 rtx sub = NULL_RTX;
1304 mode = GET_MODE (x);
1305 if (mode == VOIDmode)
1306 mode = DImode;
1307 if (GET_MODE_SIZE (mode) >= 8)
1308 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1309 if (sub)
1310 sh_print_operand (stream, sub, 0);
1311 else
1312 output_operand_lossage ("invalid operand to %%S");
1314 break;
1315 case 'T':
1316 /* Next word of a double. */
1317 switch (GET_CODE (x))
1319 case REG:
1320 fputs (reg_names[REGNO (x) + 1], (stream));
1321 break;
1322 case MEM:
1323 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1324 && GET_CODE (XEXP (x, 0)) != POST_INC)
1325 x = adjust_address (x, SImode, 4);
1326 sh_print_operand_address (stream, XEXP (x, 0));
1327 break;
1328 default:
1329 break;
1331 break;
1333 case 't':
1334 gcc_assert (MEM_P (x));
1335 x = XEXP (x, 0);
1336 switch (GET_CODE (x))
1338 case REG:
1339 case SUBREG:
1340 sh_print_operand (stream, x, 0);
1341 break;
1342 default:
1343 break;
1345 break;
1347 case 'o':
1348 switch (GET_CODE (x))
1350 case PLUS: fputs ("add", stream); break;
1351 case MINUS: fputs ("sub", stream); break;
1352 case MULT: fputs ("mul", stream); break;
1353 case DIV: fputs ("div", stream); break;
1354 case EQ: fputs ("eq", stream); break;
1355 case NE: fputs ("ne", stream); break;
1356 case GT: case LT: fputs ("gt", stream); break;
1357 case GE: case LE: fputs ("ge", stream); break;
1358 case GTU: case LTU: fputs ("gtu", stream); break;
1359 case GEU: case LEU: fputs ("geu", stream); break;
1360 default:
1361 break;
1363 break;
1364 case 'M':
1365 if (TARGET_SHMEDIA)
1367 if (MEM_P (x)
1368 && GET_CODE (XEXP (x, 0)) == PLUS
1369 && (REG_P (XEXP (XEXP (x, 0), 1))
1370 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
1371 fputc ('x', stream);
1373 else
1375 if (MEM_P (x))
1377 switch (GET_MODE (x))
1379 case QImode: fputs (".b", stream); break;
1380 case HImode: fputs (".w", stream); break;
1381 case SImode: fputs (".l", stream); break;
1382 case SFmode: fputs (".s", stream); break;
1383 case DFmode: fputs (".d", stream); break;
1384 default: gcc_unreachable ();
1388 break;
1390 case 'm':
1391 gcc_assert (MEM_P (x));
1392 x = XEXP (x, 0);
1393 /* Fall through. */
1394 case 'U':
1395 switch (GET_CODE (x))
1397 case REG:
1398 case SUBREG:
1399 sh_print_operand (stream, x, 0);
1400 fputs (", 0", stream);
1401 break;
1403 case PLUS:
1404 sh_print_operand (stream, XEXP (x, 0), 0);
1405 fputs (", ", stream);
1406 sh_print_operand (stream, XEXP (x, 1), 0);
1407 break;
1409 default:
1410 gcc_unreachable ();
1412 break;
1414 case 'V':
1416 int num = exact_log2 (INTVAL (x));
1417 gcc_assert (num >= 0);
1418 fprintf (stream, "#%d", num);
1420 break;
1422 case 'W':
1424 int num = exact_log2 (~INTVAL (x));
1425 gcc_assert (num >= 0);
1426 fprintf (stream, "#%d", num);
1428 break;
1430 case 'd':
1431 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1433 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1434 break;
1436 case 'N':
1437 if (x == CONST0_RTX (GET_MODE (x)))
1439 fprintf ((stream), "r63");
1440 break;
1442 goto default_output;
1443 case 'u':
1444 if (CONST_INT_P (x))
1446 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1447 break;
1449 /* Fall through. */
1451 default_output:
1452 default:
1453 regno = 0;
1454 mode = GET_MODE (x);
1456 switch (GET_CODE (x))
1458 case TRUNCATE:
1460 rtx inner = XEXP (x, 0);
1461 int offset = 0;
1462 enum machine_mode inner_mode;
1464 /* We might see SUBREGs with vector mode registers inside. */
1465 if (GET_CODE (inner) == SUBREG
1466 && (GET_MODE_SIZE (GET_MODE (inner))
1467 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1468 && subreg_lowpart_p (inner))
1469 inner = SUBREG_REG (inner);
1470 if (CONST_INT_P (inner))
1472 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1473 goto default_output;
1475 inner_mode = GET_MODE (inner);
1476 if (GET_CODE (inner) == SUBREG
1477 && (GET_MODE_SIZE (GET_MODE (inner))
1478 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1479 && REG_P (SUBREG_REG (inner)))
1481 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1482 GET_MODE (SUBREG_REG (inner)),
1483 SUBREG_BYTE (inner),
1484 GET_MODE (inner));
1485 inner = SUBREG_REG (inner);
1487 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1488 abort ();
1489 /* Floating point register pairs are always big endian;
1490 general purpose registers are 64 bit wide. */
1491 regno = REGNO (inner);
1492 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1493 - HARD_REGNO_NREGS (regno, mode))
1494 + offset;
1495 x = inner;
1496 goto reg;
1498 case SIGN_EXTEND:
1499 x = XEXP (x, 0);
1500 goto reg;
1501 /* FIXME: We need this on SHmedia32 because reload generates
1502 some sign-extended HI or QI loads into DImode registers
1503 but, because Pmode is SImode, the address ends up with a
1504 subreg:SI of the DImode register. Maybe reload should be
1505 fixed so as to apply alter_subreg to such loads? */
1506 case IF_THEN_ELSE:
1507 gcc_assert (trapping_target_operand (x, VOIDmode));
1508 x = XEXP (XEXP (x, 2), 0);
1509 goto default_output;
1510 case SUBREG:
1511 gcc_assert (SUBREG_BYTE (x) == 0
1512 && REG_P (SUBREG_REG (x)));
1514 x = SUBREG_REG (x);
1515 /* Fall through. */
1517 reg:
1518 case REG:
1519 regno += REGNO (x);
1520 if (FP_REGISTER_P (regno)
1521 && mode == V16SFmode)
1522 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1523 else if (FP_REGISTER_P (REGNO (x))
1524 && mode == V4SFmode)
1525 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1526 else if (REG_P (x)
1527 && mode == V2SFmode)
1528 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1529 else if (FP_REGISTER_P (REGNO (x))
1530 && GET_MODE_SIZE (mode) > 4)
1531 fprintf ((stream), "d%s", reg_names[regno] + 1);
1532 else
1533 fputs (reg_names[regno], (stream));
1534 break;
1536 case MEM:
1537 output_address (XEXP (x, 0));
1538 break;
1540 default:
1541 if (TARGET_SH1)
1542 fputc ('#', stream);
1543 output_addr_const (stream, x);
1544 break;
1546 break;
1550 static bool
1551 sh_print_operand_punct_valid_p (unsigned char code)
1553 return (code == '.' || code == '#' || code == '@' || code == ','
1554 || code == '$' || code == '\'' || code == '>');
1557 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1558 static bool
1559 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1561 if (GET_CODE (x) == UNSPEC)
1563 switch (XINT (x, 1))
1565 case UNSPEC_DATALABEL:
1566 fputs ("datalabel ", file);
1567 output_addr_const (file, XVECEXP (x, 0, 0));
1568 break;
1569 case UNSPEC_PIC:
1570 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1571 output_addr_const (file, XVECEXP (x, 0, 0));
1572 break;
1573 case UNSPEC_GOT:
1574 output_addr_const (file, XVECEXP (x, 0, 0));
1575 fputs ("@GOT", file);
1576 break;
1577 case UNSPEC_GOTOFF:
1578 output_addr_const (file, XVECEXP (x, 0, 0));
1579 fputs ("@GOTOFF", file);
1580 break;
1581 case UNSPEC_PLT:
1582 output_addr_const (file, XVECEXP (x, 0, 0));
1583 fputs ("@PLT", file);
1584 break;
1585 case UNSPEC_GOTPLT:
1586 output_addr_const (file, XVECEXP (x, 0, 0));
1587 fputs ("@GOTPLT", file);
1588 break;
1589 case UNSPEC_DTPOFF:
1590 output_addr_const (file, XVECEXP (x, 0, 0));
1591 fputs ("@DTPOFF", file);
1592 break;
1593 case UNSPEC_GOTTPOFF:
1594 output_addr_const (file, XVECEXP (x, 0, 0));
1595 fputs ("@GOTTPOFF", file);
1596 break;
1597 case UNSPEC_TPOFF:
1598 output_addr_const (file, XVECEXP (x, 0, 0));
1599 fputs ("@TPOFF", file);
1600 break;
1601 case UNSPEC_CALLER:
1603 char name[32];
1604 /* LPCS stands for Label for PIC Call Site. */
1605 targetm.asm_out.generate_internal_label (name, "LPCS",
1606 INTVAL (XVECEXP (x, 0, 0)));
1607 assemble_name (file, name);
1609 break;
1610 case UNSPEC_EXTRACT_S16:
1611 case UNSPEC_EXTRACT_U16:
1613 rtx val, shift;
1615 val = XVECEXP (x, 0, 0);
1616 shift = XVECEXP (x, 0, 1);
1617 fputc ('(', file);
1618 if (shift != const0_rtx)
1619 fputc ('(', file);
1620 if (GET_CODE (val) == CONST
1621 || GET_RTX_CLASS (GET_CODE (val)) != RTX_OBJ)
1623 fputc ('(', file);
1624 output_addr_const (file, val);
1625 fputc (')', file);
1627 else
1628 output_addr_const (file, val);
1629 if (shift != const0_rtx)
1631 fputs (" >> ", file);
1632 output_addr_const (file, shift);
1633 fputc (')', file);
1635 fputs (" & 65535)", file);
1637 break;
1638 case UNSPEC_SYMOFF:
1639 output_addr_const (file, XVECEXP (x, 0, 0));
1640 fputc ('-', file);
1641 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1643 fputc ('(', file);
1644 output_addr_const (file, XVECEXP (x, 0, 1));
1645 fputc (')', file);
1647 else
1648 output_addr_const (file, XVECEXP (x, 0, 1));
1649 break;
1650 case UNSPEC_PCREL_SYMOFF:
1651 output_addr_const (file, XVECEXP (x, 0, 0));
1652 fputs ("-(", file);
1653 output_addr_const (file, XVECEXP (x, 0, 1));
1654 fputs ("-.)", file);
1655 break;
1656 default:
1657 return false;
1659 return true;
1661 else
1662 return false;
1665 /* Encode symbol attributes of a SYMBOL_REF into its
1666 SYMBOL_REF_FLAGS. */
1667 static void
1668 sh_encode_section_info (tree decl, rtx rtl, int first)
1670 default_encode_section_info (decl, rtl, first);
1672 if (TREE_CODE (decl) == FUNCTION_DECL
1673 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1674 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1677 /* Prepare operands for a move define_expand; specifically, one of the
1678 operands must be in a register. */
1679 void
1680 prepare_move_operands (rtx operands[], enum machine_mode mode)
1682 if ((mode == SImode || mode == DImode)
1683 && flag_pic
1684 && ! ((mode == Pmode || mode == ptr_mode)
1685 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1687 rtx temp;
1688 if (SYMBOLIC_CONST_P (operands[1]))
1690 if (MEM_P (operands[0]))
1691 operands[1] = force_reg (Pmode, operands[1]);
1692 else if (TARGET_SHMEDIA
1693 && GET_CODE (operands[1]) == LABEL_REF
1694 && target_reg_operand (operands[0], mode))
1695 /* It's ok. */;
1696 else
1698 temp = (!can_create_pseudo_p ()
1699 ? operands[0]
1700 : gen_reg_rtx (Pmode));
1701 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1704 else if (GET_CODE (operands[1]) == CONST
1705 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1706 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1708 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1709 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1710 mode, temp);
1711 operands[1] = expand_binop (mode, add_optab, temp,
1712 XEXP (XEXP (operands[1], 0), 1),
1713 (!can_create_pseudo_p ()
1714 ? temp
1715 : gen_reg_rtx (Pmode)),
1716 0, OPTAB_LIB_WIDEN);
1720 if (! reload_in_progress && ! reload_completed)
1722 /* Copy the source to a register if both operands aren't registers. */
1723 if (! register_operand (operands[0], mode)
1724 && ! sh_register_operand (operands[1], mode))
1725 operands[1] = copy_to_mode_reg (mode, operands[1]);
1727 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1729 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1730 except that we can't use that function because it is static. */
1731 rtx new_rtx = change_address (operands[0], mode, 0);
1732 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1733 operands[0] = new_rtx;
1736 /* This case can happen while generating code to move the result
1737 of a library call to the target. Reject `st r0,@(rX,rY)' because
1738 reload will fail to find a spill register for rX, since r0 is already
1739 being used for the source. */
1740 else if (TARGET_SH1
1741 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1742 && MEM_P (operands[0])
1743 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1744 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1745 operands[1] = copy_to_mode_reg (mode, operands[1]);
1748 if (mode == Pmode || mode == ptr_mode)
1750 rtx op0, op1, opc;
1751 enum tls_model tls_kind;
1753 op0 = operands[0];
1754 op1 = operands[1];
1755 if (GET_CODE (op1) == CONST
1756 && GET_CODE (XEXP (op1, 0)) == PLUS
1757 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1758 != TLS_MODEL_NONE))
1760 opc = XEXP (XEXP (op1, 0), 1);
1761 op1 = XEXP (XEXP (op1, 0), 0);
1763 else
1764 opc = NULL_RTX;
1766 if (! reload_in_progress && ! reload_completed
1767 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1769 rtx tga_op1, tga_ret, tmp, tmp2;
1771 if (! flag_pic
1772 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1773 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1774 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1776 /* Don't schedule insns for getting GOT address when
1777 the first scheduling is enabled, to avoid spill
1778 failures for R0. */
1779 if (flag_schedule_insns)
1780 emit_insn (gen_blockage ());
1781 emit_insn (gen_GOTaddr2picreg ());
1782 emit_use (gen_rtx_REG (SImode, PIC_REG));
1783 if (flag_schedule_insns)
1784 emit_insn (gen_blockage ());
1787 switch (tls_kind)
1789 case TLS_MODEL_GLOBAL_DYNAMIC:
1790 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1791 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1792 tmp = gen_reg_rtx (Pmode);
1793 emit_move_insn (tmp, tga_ret);
1794 op1 = tmp;
1795 break;
1797 case TLS_MODEL_LOCAL_DYNAMIC:
1798 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1799 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1801 tmp = gen_reg_rtx (Pmode);
1802 emit_move_insn (tmp, tga_ret);
1804 if (register_operand (op0, Pmode))
1805 tmp2 = op0;
1806 else
1807 tmp2 = gen_reg_rtx (Pmode);
1809 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1810 op1 = tmp2;
1811 break;
1813 case TLS_MODEL_INITIAL_EXEC:
1814 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1815 tmp = gen_sym2GOTTPOFF (op1);
1816 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1817 op1 = tga_op1;
1818 break;
1820 case TLS_MODEL_LOCAL_EXEC:
1821 tmp2 = gen_reg_rtx (Pmode);
1822 emit_insn (gen_store_gbr (tmp2));
1823 tmp = gen_reg_rtx (Pmode);
1824 emit_insn (gen_symTPOFF2reg (tmp, op1));
1826 if (register_operand (op0, Pmode))
1827 op1 = op0;
1828 else
1829 op1 = gen_reg_rtx (Pmode);
1831 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1832 break;
1834 default:
1835 gcc_unreachable ();
1837 if (opc)
1838 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1839 operands[1] = op1;
1844 /* Implement the canonicalize_comparison target hook for the combine
1845 pass. For the target hook this function is invoked via
1846 sh_canonicalize_comparison. This function is also re-used to
1847 canonicalize comparisons in cbranch pattern expanders. */
1848 static void
1849 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1850 enum machine_mode mode,
1851 bool op0_preserve_value)
1853 /* When invoked from within the combine pass the mode is not specified,
1854 so try to get it from one of the operands. */
1855 if (mode == VOIDmode)
1856 mode = GET_MODE (op0);
1857 if (mode == VOIDmode)
1858 mode = GET_MODE (op1);
1860 // We need to have a mode to do something useful here.
1861 if (mode == VOIDmode)
1862 return;
1864 // Currently, we don't deal with floats here.
1865 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1866 return;
1868 // Make sure that the constant operand is the second operand.
1869 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1871 if (op0_preserve_value)
1872 return;
1874 std::swap (op0, op1);
1875 cmp = swap_condition (cmp);
1878 if (CONST_INT_P (op1))
1880 /* Try to adjust the constant operand in such a way that available
1881 comparison insns can be utilized better and the constant can be
1882 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1883 constant pool. */
1884 const HOST_WIDE_INT val = INTVAL (op1);
1886 /* x > -1 --> x >= 0
1887 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1888 x <= -1 --> x < 0
1889 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1890 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1892 cmp = cmp == GT ? GE : LT;
1893 op1 = gen_int_mode (val + 1, mode);
1896 /* x >= 1 --> x > 0
1897 x >= 0x80 --> x > 0x7F
1898 x < 1 --> x <= 0
1899 x < 0x80 --> x <= 0x7F */
1900 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1902 cmp = cmp == GE ? GT : LE;
1903 op1 = gen_int_mode (val - 1, mode);
1906 /* unsigned x >= 1 --> x != 0
1907 unsigned x < 1 --> x == 0 */
1908 else if (val == 1 && (cmp == GEU || cmp == LTU))
1910 cmp = cmp == GEU ? NE : EQ;
1911 op1 = CONST0_RTX (mode);
1914 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1915 unsigned x < 0x80 --> unsigned x < 0x7F */
1916 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1918 cmp = cmp == GEU ? GTU : LEU;
1919 op1 = gen_int_mode (val - 1, mode);
1922 /* unsigned x > 0 --> x != 0
1923 unsigned x <= 0 --> x == 0 */
1924 else if (val == 0 && (cmp == GTU || cmp == LEU))
1925 cmp = cmp == GTU ? NE : EQ;
1927 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1928 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1929 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1930 && val == 0x7FFFFFFF)
1932 cmp = cmp == GTU ? LT : GE;
1933 op1 = const0_rtx;
1936 /* unsigned x >= 0x80000000 --> signed x < 0
1937 unsigned x < 0x80000000 --> signed x >= 0 */
1938 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1939 && (unsigned HOST_WIDE_INT)val
1940 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1942 cmp = cmp == GEU ? LT : GE;
1943 op1 = const0_rtx;
1948 /* This function implements the canonicalize_comparison target hook.
1949 This wrapper around the internally used sh_canonicalize_comparison
1950 function is needed to do the enum rtx_code <-> int conversion.
1951 Target hooks cannot use enum rtx_code in its definition. */
1952 static void
1953 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1954 bool op0_preserve_value)
1956 enum rtx_code tmp_code = (enum rtx_code)*code;
1957 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1958 VOIDmode, op0_preserve_value);
1959 *code = (int)tmp_code;
1962 bool
1963 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1965 *p1 = T_REG;
1966 *p2 = INVALID_REGNUM;
1967 return true;
1970 enum rtx_code
1971 prepare_cbranch_operands (rtx *operands, enum machine_mode mode,
1972 enum rtx_code comparison)
1974 /* The scratch reg is only available when this is invoked from within
1975 the cbranchdi4_i splitter, through expand_cbranchdi4. */
1976 rtx scratch = NULL_RTX;
1978 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1979 comparison = GET_CODE (operands[0]);
1980 else
1981 scratch = operands[4];
1983 sh_canonicalize_comparison (comparison, operands[1], operands[2],
1984 mode, false);
1986 /* Notice that this function is also invoked after reload by
1987 the cbranchdi4_i pattern, through expand_cbranchdi4. */
1988 rtx op1 = operands[1];
1990 if (can_create_pseudo_p ())
1991 operands[1] = force_reg (mode, op1);
1992 /* When we are handling DImode comparisons, we want to keep constants so
1993 that we can optimize the component comparisons; however, memory loads
1994 are better issued as a whole so that they can be scheduled well.
1995 SImode equality comparisons allow I08 constants, but only when they
1996 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1997 into a register, that register might as well be r0, and we allow the
1998 constant. If it is already in a register, this is likely to be
1999 allocated to a different hard register, thus we load the constant into
2000 a register unless it is zero. */
2001 if (!REG_P (operands[2])
2002 && (!CONST_INT_P (operands[2])
2003 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2004 && ((comparison != EQ && comparison != NE)
2005 || (REG_P (op1) && REGNO (op1) != R0_REG)
2006 || !satisfies_constraint_I08 (operands[2])))))
2008 if (scratch && GET_MODE (scratch) == mode)
2010 emit_move_insn (scratch, operands[2]);
2011 operands[2] = scratch;
2013 else if (can_create_pseudo_p ())
2014 operands[2] = force_reg (mode, operands[2]);
2016 return comparison;
2019 void
2020 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, int probability)
2022 rtx (*branch_expander) (rtx) = gen_branch_true;
2023 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2024 switch (comparison)
2026 case NE: case LT: case LE: case LTU: case LEU:
2027 comparison = reverse_condition (comparison);
2028 branch_expander = gen_branch_false;
2029 default: ;
2031 emit_insn (gen_rtx_SET (VOIDmode, get_t_reg_rtx (),
2032 gen_rtx_fmt_ee (comparison, SImode,
2033 operands[1], operands[2])));
2034 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2035 if (probability >= 0)
2036 add_int_reg_note (jump, REG_BR_PROB, probability);
2039 /* ??? How should we distribute probabilities when more than one branch
2040 is generated. So far we only have some ad-hoc observations:
2041 - If the operands are random, they are likely to differ in both parts.
2042 - If comparing items in a hash chain, the operands are random or equal;
2043 operation should be EQ or NE.
2044 - If items are searched in an ordered tree from the root, we can expect
2045 the highpart to be unequal about half of the time; operation should be
2046 an inequality comparison, operands non-constant, and overall probability
2047 about 50%. Likewise for quicksort.
2048 - Range checks will be often made against constants. Even if we assume for
2049 simplicity an even distribution of the non-constant operand over a
2050 sub-range here, the same probability could be generated with differently
2051 wide sub-ranges - as long as the ratio of the part of the subrange that
2052 is before the threshold to the part that comes after the threshold stays
2053 the same. Thus, we can't really tell anything here;
2054 assuming random distribution is at least simple.
2056 bool
2057 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2059 enum rtx_code msw_taken, msw_skip, lsw_taken;
2060 rtx_code_label *skip_label = NULL;
2061 rtx op1h, op1l, op2h, op2l;
2062 int num_branches;
2063 int prob, rev_prob;
2064 int msw_taken_prob = -1, msw_skip_prob = -1, lsw_taken_prob = -1;
2065 rtx scratch = operands[4];
2067 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2068 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2069 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2070 op1l = gen_lowpart (SImode, operands[1]);
2071 op2l = gen_lowpart (SImode, operands[2]);
2072 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2073 prob = split_branch_probability;
2074 rev_prob = REG_BR_PROB_BASE - prob;
2075 switch (comparison)
2077 /* ??? Should we use the cmpeqdi_t pattern for equality comparisons?
2078 That costs 1 cycle more when the first branch can be predicted taken,
2079 but saves us mispredicts because only one branch needs prediction.
2080 It also enables generating the cmpeqdi_t-1 pattern. */
2081 case EQ:
2082 if (TARGET_CMPEQDI_T)
2084 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2085 emit_jump_insn (gen_branch_true (operands[3]));
2086 return true;
2088 msw_skip = NE;
2089 lsw_taken = EQ;
2090 if (prob >= 0)
2092 // If we had more precision, we'd use rev_prob - (rev_prob >> 32) .
2093 msw_skip_prob = rev_prob;
2094 if (REG_BR_PROB_BASE <= 65535)
2095 lsw_taken_prob = prob ? REG_BR_PROB_BASE : 0;
2096 else
2098 lsw_taken_prob
2099 = (prob
2100 ? (REG_BR_PROB_BASE
2101 - ((gcov_type) REG_BR_PROB_BASE * rev_prob
2102 / ((gcov_type) prob << 32)))
2103 : 0);
2106 break;
2107 case NE:
2108 if (TARGET_CMPEQDI_T)
2110 emit_insn (gen_cmpeqdi_t (operands[1], operands[2]));
2111 emit_jump_insn (gen_branch_false (operands[3]));
2112 return true;
2114 msw_taken = NE;
2115 msw_taken_prob = prob;
2116 lsw_taken = NE;
2117 lsw_taken_prob = 0;
2118 break;
2119 case GTU: case GT:
2120 msw_taken = comparison;
2121 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2122 break;
2123 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2124 msw_skip = swap_condition (msw_taken);
2125 lsw_taken = GTU;
2126 break;
2127 case GEU: case GE:
2128 if (op2l == CONST0_RTX (SImode))
2129 msw_taken = comparison;
2130 else
2132 msw_taken = comparison == GE ? GT : GTU;
2133 msw_skip = swap_condition (msw_taken);
2134 lsw_taken = GEU;
2136 break;
2137 case LTU: case LT:
2138 msw_taken = comparison;
2139 if (op2l == CONST0_RTX (SImode))
2140 break;
2141 msw_skip = swap_condition (msw_taken);
2142 lsw_taken = LTU;
2143 break;
2144 case LEU: case LE:
2145 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2146 msw_taken = comparison;
2147 else
2149 lsw_taken = LEU;
2150 if (comparison == LE)
2151 msw_taken = LT;
2152 else if (op2h != CONST0_RTX (SImode))
2153 msw_taken = LTU;
2154 else
2156 msw_skip = swap_condition (LTU);
2157 break;
2159 msw_skip = swap_condition (msw_taken);
2161 break;
2162 default: return false;
2164 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2165 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2166 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2167 if (comparison != EQ && comparison != NE && num_branches > 1)
2169 if (!CONSTANT_P (operands[2])
2170 && prob >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2171 && prob <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2173 msw_taken_prob = prob / 2U;
2174 msw_skip_prob
2175 = REG_BR_PROB_BASE * rev_prob / (REG_BR_PROB_BASE + rev_prob);
2176 lsw_taken_prob = prob;
2178 else
2180 msw_taken_prob = prob;
2181 msw_skip_prob = REG_BR_PROB_BASE;
2182 /* ??? If we have a constant op2h, should we use that when
2183 calculating lsw_taken_prob? */
2184 lsw_taken_prob = prob;
2187 operands[1] = op1h;
2188 operands[2] = op2h;
2189 operands[4] = NULL_RTX;
2190 if (reload_completed
2191 && ! arith_reg_or_0_operand (op2h, SImode)
2192 && (true_regnum (op1h) || (comparison != EQ && comparison != NE))
2193 && (msw_taken != LAST_AND_UNUSED_RTX_CODE
2194 || msw_skip != LAST_AND_UNUSED_RTX_CODE))
2196 emit_move_insn (scratch, operands[2]);
2197 operands[2] = scratch;
2199 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2200 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2201 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2203 rtx taken_label = operands[3];
2205 /* Operands were possibly modified, but msw_skip doesn't expect this.
2206 Always use the original ones. */
2207 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2209 operands[1] = op1h;
2210 operands[2] = op2h;
2211 if (reload_completed
2212 && ! arith_reg_or_0_operand (op2h, SImode)
2213 && (true_regnum (op1h) || (comparison != EQ && comparison != NE)))
2215 emit_move_insn (scratch, operands[2]);
2216 operands[2] = scratch;
2220 operands[3] = skip_label = gen_label_rtx ();
2221 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2222 operands[3] = taken_label;
2224 operands[1] = op1l;
2225 operands[2] = op2l;
2226 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2228 if (reload_completed
2229 && ! arith_reg_or_0_operand (op2l, SImode)
2230 && (true_regnum (op1l) || (lsw_taken != EQ && lsw_taken != NE)))
2232 emit_move_insn (scratch, operands[2]);
2233 operands[2] = scratch;
2235 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2237 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2238 emit_label (skip_label);
2239 return true;
2242 /* Given an operand, return 1 if the evaluated operand plugged into an
2243 if_then_else will result in a branch_true, 0 if branch_false, or
2244 -1 if neither nor applies. The truth table goes like this:
2246 op | cmpval | code | result
2247 ---------+--------+---------+--------------------
2248 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2249 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2250 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2251 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2252 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2253 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2254 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2255 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2257 sh_eval_treg_value (rtx op)
2259 if (t_reg_operand (op, GET_MODE (op)))
2260 return 1;
2261 if (negt_reg_operand (op, GET_MODE (op)))
2262 return 0;
2264 rtx_code code = GET_CODE (op);
2265 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2266 return -1;
2268 int cmpop = code == EQ ? 1 : 0;
2269 int cmpval = INTVAL (XEXP (op, 1));
2270 if (cmpval != 0 && cmpval != 1)
2271 return -1;
2273 int t;
2274 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2275 t = 0;
2276 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2277 t = 1;
2278 else
2279 return -1;
2281 return t ^ (cmpval == cmpop);
2284 /* Emit INSN, possibly in a PARALLEL with an USE of fpscr for SH4. */
2286 static void
2287 sh_emit_set_t_insn (rtx insn, enum machine_mode mode)
2289 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
2291 insn = gen_rtx_PARALLEL (VOIDmode,
2292 gen_rtvec (2, insn,
2293 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
2294 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
2296 else
2297 emit_insn (insn);
2300 /* Prepare the operands for an scc instruction; make sure that the
2301 compare has been done and the result is in T_REG. */
2302 void
2303 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2305 rtx t_reg = get_t_reg_rtx ();
2306 enum rtx_code oldcode = code;
2307 enum machine_mode mode;
2309 /* First need a compare insn. */
2310 switch (code)
2312 case NE:
2313 /* It isn't possible to handle this case. */
2314 gcc_unreachable ();
2315 case LT:
2316 code = GT;
2317 break;
2318 case LE:
2319 code = GE;
2320 break;
2321 case LTU:
2322 code = GTU;
2323 break;
2324 case LEU:
2325 code = GEU;
2326 break;
2327 default:
2328 break;
2330 if (code != oldcode)
2332 rtx tmp = op0;
2333 op0 = op1;
2334 op1 = tmp;
2337 mode = GET_MODE (op0);
2338 if (mode == VOIDmode)
2339 mode = GET_MODE (op1);
2341 op0 = force_reg (mode, op0);
2342 if ((code != EQ && code != NE
2343 && (op1 != const0_rtx
2344 || code == GTU || code == GEU || code == LTU || code == LEU))
2345 || (mode == DImode && op1 != const0_rtx)
2346 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2347 op1 = force_reg (mode, op1);
2349 sh_emit_set_t_insn (gen_rtx_SET (VOIDmode, t_reg,
2350 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2351 mode);
2355 sh_emit_cheap_store_flag (enum machine_mode mode, enum rtx_code code,
2356 rtx op0, rtx op1)
2358 rtx target = gen_reg_rtx (SImode);
2359 rtx tmp;
2361 gcc_assert (TARGET_SHMEDIA);
2362 switch (code)
2364 case EQ:
2365 case GT:
2366 case LT:
2367 case UNORDERED:
2368 case GTU:
2369 case LTU:
2370 tmp = gen_rtx_fmt_ee (code, SImode, op0, op1);
2371 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2372 code = NE;
2373 break;
2375 case NE:
2376 case GE:
2377 case LE:
2378 case ORDERED:
2379 case GEU:
2380 case LEU:
2381 tmp = gen_rtx_fmt_ee (reverse_condition (code), mode, op0, op1);
2382 emit_insn (gen_cstore4_media (target, tmp, op0, op1));
2383 code = EQ;
2384 break;
2386 case UNEQ:
2387 case UNGE:
2388 case UNGT:
2389 case UNLE:
2390 case UNLT:
2391 case LTGT:
2392 return NULL_RTX;
2394 default:
2395 gcc_unreachable ();
2398 if (mode == DImode)
2400 rtx t2 = gen_reg_rtx (DImode);
2401 emit_insn (gen_extendsidi2 (t2, target));
2402 target = t2;
2405 return gen_rtx_fmt_ee (code, VOIDmode, target, const0_rtx);
2408 /* Called from the md file, set up the operands of a compare instruction. */
2409 void
2410 sh_emit_compare_and_branch (rtx *operands, enum machine_mode mode)
2412 enum rtx_code code = GET_CODE (operands[0]);
2413 enum rtx_code branch_code;
2414 rtx op0 = operands[1];
2415 rtx op1 = operands[2];
2416 rtx insn, tem;
2417 bool need_ccmpeq = false;
2419 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2421 op0 = force_reg (mode, op0);
2422 op1 = force_reg (mode, op1);
2424 else
2426 if (code != EQ || mode == DImode)
2428 /* Force args into regs, since we can't use constants here. */
2429 op0 = force_reg (mode, op0);
2430 if (op1 != const0_rtx || code == GTU || code == GEU)
2431 op1 = force_reg (mode, op1);
2435 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2437 if (code == LT
2438 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2439 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2441 tem = op0, op0 = op1, op1 = tem;
2442 code = swap_condition (code);
2445 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2446 if (code == GE)
2448 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2449 need_ccmpeq = true;
2450 code = GT;
2453 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2454 to EQ/GT respectively. */
2455 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2458 switch (code)
2460 case EQ:
2461 case GT:
2462 case GE:
2463 case GTU:
2464 case GEU:
2465 branch_code = code;
2466 break;
2467 case NE:
2468 case LT:
2469 case LE:
2470 case LTU:
2471 case LEU:
2472 branch_code = reverse_condition (code);
2473 break;
2474 default:
2475 gcc_unreachable ();
2478 insn = gen_rtx_SET (VOIDmode,
2479 get_t_reg_rtx (),
2480 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2482 sh_emit_set_t_insn (insn, mode);
2483 if (need_ccmpeq)
2484 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2486 if (branch_code == code)
2487 emit_jump_insn (gen_branch_true (operands[3]));
2488 else
2489 emit_jump_insn (gen_branch_false (operands[3]));
2492 void
2493 sh_emit_compare_and_set (rtx *operands, enum machine_mode mode)
2495 enum rtx_code code = GET_CODE (operands[1]);
2496 rtx op0 = operands[2];
2497 rtx op1 = operands[3];
2498 rtx_code_label *lab = NULL;
2499 bool invert = false;
2500 rtx tem;
2502 op0 = force_reg (mode, op0);
2503 if ((code != EQ && code != NE
2504 && (op1 != const0_rtx
2505 || code == GTU || code == GEU || code == LTU || code == LEU))
2506 || (mode == DImode && op1 != const0_rtx)
2507 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2508 op1 = force_reg (mode, op1);
2510 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2512 if (code == LT || code == LE)
2514 code = swap_condition (code);
2515 tem = op0, op0 = op1, op1 = tem;
2517 if (code == GE)
2519 if (TARGET_IEEE)
2521 lab = gen_label_rtx ();
2522 sh_emit_scc_to_t (EQ, op0, op1);
2523 emit_jump_insn (gen_branch_true (lab));
2524 code = GT;
2526 else
2528 code = LT;
2529 invert = true;
2534 if (code == NE)
2536 code = EQ;
2537 invert = true;
2540 sh_emit_scc_to_t (code, op0, op1);
2541 if (lab)
2542 emit_label (lab);
2543 if (invert)
2544 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2545 else
2546 emit_move_insn (operands[0], get_t_reg_rtx ());
2549 /* Functions to output assembly code. */
2551 /* Return a sequence of instructions to perform DI or DF move.
2553 Since the SH cannot move a DI or DF in one instruction, we have
2554 to take care when we see overlapping source and dest registers. */
2555 const char *
2556 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2557 enum machine_mode mode)
2559 rtx dst = operands[0];
2560 rtx src = operands[1];
2562 if (MEM_P (dst)
2563 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2564 return "mov.l %T1,%0" "\n"
2565 " mov.l %1,%0";
2567 if (register_operand (dst, mode)
2568 && register_operand (src, mode))
2570 if (REGNO (src) == MACH_REG)
2571 return "sts mach,%S0" "\n"
2572 " sts macl,%R0";
2574 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2575 when mov.d r1,r0 do r1->r0 then r2->r1. */
2576 if (REGNO (src) + 1 == REGNO (dst))
2577 return "mov %T1,%T0" "\n"
2578 " mov %1,%0";
2579 else
2580 return "mov %1,%0" "\n"
2581 " mov %T1,%T0";
2583 else if (CONST_INT_P (src))
2585 if (INTVAL (src) < 0)
2586 output_asm_insn ("mov #-1,%S0", operands);
2587 else
2588 output_asm_insn ("mov #0,%S0", operands);
2590 return "mov %1,%R0";
2592 else if (MEM_P (src))
2594 int ptrreg = -1;
2595 int dreg = REGNO (dst);
2596 rtx inside = XEXP (src, 0);
2598 switch (GET_CODE (inside))
2600 case REG:
2601 ptrreg = REGNO (inside);
2602 break;
2604 case SUBREG:
2605 ptrreg = subreg_regno (inside);
2606 break;
2608 case PLUS:
2609 ptrreg = REGNO (XEXP (inside, 0));
2610 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2611 an offsettable address. Unfortunately, offsettable addresses use
2612 QImode to check the offset, and a QImode offsettable address
2613 requires r0 for the other operand, which is not currently
2614 supported, so we can't use the 'o' constraint.
2615 Thus we must check for and handle r0+REG addresses here.
2616 We punt for now, since this is likely very rare. */
2617 gcc_assert (!REG_P (XEXP (inside, 1)));
2618 break;
2620 case LABEL_REF:
2621 return "mov.l %1,%0" "\n"
2622 " mov.l %1+4,%T0";
2623 case POST_INC:
2624 return "mov.l %1,%0" "\n"
2625 " mov.l %1,%T0";
2626 default:
2627 gcc_unreachable ();
2630 /* Work out the safe way to copy. Copy into the second half first. */
2631 if (dreg == ptrreg)
2632 return "mov.l %T1,%T0" "\n"
2633 " mov.l %1,%0";
2636 return "mov.l %1,%0" "\n"
2637 " mov.l %T1,%T0";
2640 /* Print an instruction which would have gone into a delay slot after
2641 another instruction, but couldn't because the other instruction expanded
2642 into a sequence where putting the slot insn at the end wouldn't work. */
2643 static void
2644 print_slot (rtx_sequence *seq)
2646 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2648 INSN_DELETED_P (seq->insn (1)) = 1;
2651 const char *
2652 output_far_jump (rtx_insn *insn, rtx op)
2654 struct { rtx lab, reg, op; } this_jmp;
2655 rtx_code_label *braf_base_lab = NULL;
2656 const char *jump;
2657 int far;
2658 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2659 rtx_insn *prev;
2661 this_jmp.lab = gen_label_rtx ();
2663 if (TARGET_SH2
2664 && offset >= -32764
2665 && offset - get_attr_length (insn) <= 32766)
2667 far = 0;
2668 jump = "mov.w %O0,%1" "\n"
2669 " braf %1";
2671 else
2673 far = 1;
2674 if (flag_pic)
2676 if (TARGET_SH2)
2677 jump = "mov.l %O0,%1" "\n"
2678 " braf %1";
2679 else
2680 jump = "mov.l r0,@-r15" "\n"
2681 " mova %O0,r0" "\n"
2682 " mov.l @r0,%1" "\n"
2683 " add r0,%1" "\n"
2684 " mov.l @r15+,r0" "\n"
2685 " jmp @%1";
2687 else
2688 jump = "mov.l %O0,%1" "\n"
2689 " jmp @%1";
2691 /* If we have a scratch register available, use it. */
2692 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2693 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2695 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2696 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2697 jump = "mov.l r1,@-r15" "\n"
2698 " mova %O0,r0" "\n"
2699 " mov.l @r0,r1" "\n"
2700 " add r1,r0" "\n"
2701 " mov.l @r15+,r1" "\n"
2702 " jmp @%1";
2703 output_asm_insn (jump, &this_jmp.lab);
2704 if (dbr_sequence_length ())
2705 print_slot (final_sequence);
2706 else
2707 output_asm_insn ("nop", 0);
2709 else
2711 /* Output the delay slot insn first if any. */
2712 if (dbr_sequence_length ())
2713 print_slot (final_sequence);
2715 this_jmp.reg = gen_rtx_REG (SImode, 13);
2716 /* We must keep the stack aligned to 8-byte boundaries on SH5.
2717 Fortunately, MACL is fixed and call-clobbered, and we never
2718 need its value across jumps, so save r13 in it instead of in
2719 the stack. */
2720 if (TARGET_SH5)
2721 output_asm_insn ("lds r13,macl", 0);
2722 else
2723 output_asm_insn ("mov.l r13,@-r15", 0);
2724 output_asm_insn (jump, &this_jmp.lab);
2725 if (TARGET_SH5)
2726 output_asm_insn ("sts macl,r13", 0);
2727 else
2728 output_asm_insn ("mov.l @r15+,r13", 0);
2730 if (far && flag_pic && TARGET_SH2)
2732 braf_base_lab = gen_label_rtx ();
2733 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2734 CODE_LABEL_NUMBER (braf_base_lab));
2736 if (far)
2737 output_asm_insn (".align 2", 0);
2738 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2739 this_jmp.op = op;
2740 if (far && flag_pic)
2742 if (TARGET_SH2)
2743 this_jmp.lab = braf_base_lab;
2744 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2746 else
2747 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2748 return "";
2751 /* Local label counter, used for constants in the pool and inside
2752 pattern branches. */
2753 static int lf = 100;
2755 /* Output code for ordinary branches. */
2756 const char *
2757 output_branch (int logic, rtx insn, rtx *operands)
2759 switch (get_attr_length (insn))
2761 case 6:
2762 /* This can happen if filling the delay slot has caused a forward
2763 branch to exceed its range (we could reverse it, but only
2764 when we know we won't overextend other branches; this should
2765 best be handled by relaxation).
2766 It can also happen when other condbranches hoist delay slot insn
2767 from their destination, thus leading to code size increase.
2768 But the branch will still be in the range -4092..+4098 bytes. */
2769 if (! TARGET_RELAX)
2771 int label = lf++;
2772 /* The call to print_slot will clobber the operands. */
2773 rtx op0 = operands[0];
2775 /* If the instruction in the delay slot is annulled (true), then
2776 there is no delay slot where we can put it now. The only safe
2777 place for it is after the label. final will do that by default. */
2779 if (final_sequence
2780 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
2781 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
2783 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2784 ASSEMBLER_DIALECT ? "/" : ".", label);
2785 print_slot (final_sequence);
2787 else
2788 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2790 output_asm_insn ("bra\t%l0", &op0);
2791 fprintf (asm_out_file, "\tnop\n");
2792 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2794 return "";
2796 /* When relaxing, handle this like a short branch. The linker
2797 will fix it up if it still doesn't fit after relaxation. */
2798 case 2:
2799 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2801 /* These are for SH2e, in which we have to account for the
2802 extra nop because of the hardware bug in annulled branches. */
2803 case 8:
2804 if (! TARGET_RELAX)
2806 int label = lf++;
2808 gcc_assert (!final_sequence
2809 || !(INSN_ANNULLED_BRANCH_P
2810 (XVECEXP (final_sequence, 0, 0))));
2811 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2812 logic ? "f" : "t",
2813 ASSEMBLER_DIALECT ? "/" : ".", label);
2814 fprintf (asm_out_file, "\tnop\n");
2815 output_asm_insn ("bra\t%l0", operands);
2816 fprintf (asm_out_file, "\tnop\n");
2817 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2819 return "";
2821 /* When relaxing, fall through. */
2822 case 4:
2824 char buffer[10];
2826 sprintf (buffer, "b%s%ss\t%%l0",
2827 logic ? "t" : "f",
2828 ASSEMBLER_DIALECT ? "/" : ".");
2829 output_asm_insn (buffer, &operands[0]);
2830 return "nop";
2833 default:
2834 /* There should be no longer branches now - that would
2835 indicate that something has destroyed the branches set
2836 up in machine_dependent_reorg. */
2837 gcc_unreachable ();
2841 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2842 fill in operands 9 as a label to the successor insn.
2843 We try to use jump threading where possible.
2844 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2845 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2846 follow jmp and bt, if the address is in range. */
2847 const char *
2848 output_branchy_insn (enum rtx_code code, const char *templ,
2849 rtx_insn *insn, rtx *operands)
2851 rtx_insn *next_insn = NEXT_INSN (insn);
2853 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2855 rtx src = SET_SRC (PATTERN (next_insn));
2856 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2858 /* Following branch not taken */
2859 rtx_code_label *lab = gen_label_rtx ();
2860 emit_label_after (lab, next_insn);
2861 INSN_ADDRESSES_NEW (lab,
2862 INSN_ADDRESSES (INSN_UID (next_insn))
2863 + get_attr_length (next_insn));
2864 operands[9] = lab;
2865 return templ;
2867 else
2869 int offset = (branch_dest (next_insn)
2870 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2871 if (offset >= -252 && offset <= 258)
2873 if (GET_CODE (src) == IF_THEN_ELSE)
2874 /* branch_true */
2875 src = XEXP (src, 1);
2876 operands[9] = src;
2877 return templ;
2881 rtx_code_label *lab = gen_label_rtx ();
2882 emit_label_after (lab, insn);
2883 INSN_ADDRESSES_NEW (lab,
2884 INSN_ADDRESSES (INSN_UID (insn))
2885 + get_attr_length (insn));
2886 operands[9] = lab;
2887 return templ;
2890 const char *
2891 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2893 return output_branchy_insn (NE, "bt %l9" "\n"
2894 " fcmp/eq %1,%0",
2895 insn, operands);
2898 /* Output the start of the assembler file. */
2899 static void
2900 sh_file_start (void)
2902 default_file_start ();
2904 if (TARGET_ELF)
2905 /* We need to show the text section with the proper
2906 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2907 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2908 will complain. We can teach GAS specifically about the
2909 default attributes for our choice of text section, but
2910 then we would have to change GAS again if/when we change
2911 the text section name. */
2912 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2913 else
2914 /* Switch to the data section so that the coffsem symbol
2915 isn't in the text section. */
2916 switch_to_section (data_section);
2918 if (TARGET_LITTLE_ENDIAN)
2919 fputs ("\t.little\n", asm_out_file);
2921 if (!TARGET_ELF)
2923 if (TARGET_SHCOMPACT)
2924 fputs ("\t.mode\tSHcompact\n", asm_out_file);
2925 else if (TARGET_SHMEDIA)
2926 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
2927 TARGET_SHMEDIA64 ? 64 : 32);
2931 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2932 static bool
2933 unspec_caller_rtx_p (rtx pat)
2935 rtx base, offset;
2936 int i;
2938 split_const (pat, &base, &offset);
2939 if (GET_CODE (base) == UNSPEC)
2941 if (XINT (base, 1) == UNSPEC_CALLER)
2942 return true;
2943 for (i = 0; i < XVECLEN (base, 0); i++)
2944 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2945 return true;
2947 return false;
2950 /* Indicate that INSN cannot be duplicated. This is true for insn
2951 that generates a unique label. */
2952 static bool
2953 sh_cannot_copy_insn_p (rtx_insn *insn)
2955 rtx pat;
2957 if (!reload_completed || !flag_pic)
2958 return false;
2960 if (!NONJUMP_INSN_P (insn))
2961 return false;
2962 if (asm_noperands (insn) >= 0)
2963 return false;
2965 pat = PATTERN (insn);
2966 if (GET_CODE (pat) != SET)
2967 return false;
2968 pat = SET_SRC (pat);
2970 if (unspec_caller_rtx_p (pat))
2971 return true;
2973 return false;
2976 /* Number of instructions used to make an arithmetic right shift by N. */
2977 static const char ashiftrt_insns[] =
2978 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2980 /* Description of a logical left or right shift, when expanded to a sequence
2981 of 1/2/8/16 shifts.
2982 Notice that one bit right shifts clobber the T bit. One bit left shifts
2983 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2984 enum
2986 ASHL_CLOBBERS_T = 1 << 0,
2987 LSHR_CLOBBERS_T = 1 << 1
2990 struct ashl_lshr_sequence
2992 char insn_count;
2993 char amount[6];
2994 char clobbers_t;
2997 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2999 { 0, { 0 }, 0 }, // 0
3000 { 1, { 1 }, LSHR_CLOBBERS_T },
3001 { 1, { 2 }, 0 },
3002 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3003 { 2, { 2, 2 }, 0 }, // 4
3004 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3005 { 3, { 2, 2, 2 }, 0 },
3006 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
3007 { 1, { 8 }, 0 }, // 8
3008 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3009 { 2, { 8, 2 }, 0 },
3010 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3011 { 3, { 8, 2, 2 }, 0 }, // 12
3012 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
3013 { 3, { 8, -2, 8 }, 0 },
3014 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
3015 { 1, { 16 }, 0 }, // 16
3016 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3017 { 2, { 16, 2 }, 0 },
3018 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3019 { 3, { 16, 2, 2 }, 0 }, // 20
3020 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3021 { 3, { 16, -2, 8 }, 0 },
3022 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3023 { 2, { 16, 8 }, 0 }, // 24
3024 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3025 { 3, { 16, 8, 2 }, 0 },
3026 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3027 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3028 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3029 { 3, { 16, -2, 16 }, 0 },
3031 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
3032 For a left shift by 31 a 2 insn and-rotl sequences can be used.
3033 However, the shift-and combiner code needs this entry here to be in
3034 terms of real shift insns. */
3035 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3038 /* Individual shift amounts for shift amounts < 16, up to three highmost
3039 bits might be clobbered. This is typically used when combined with some
3040 kind of sign or zero extension. */
3041 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
3043 { 0, { 0 }, 0 }, // 0
3044 { 1, { 1 }, LSHR_CLOBBERS_T },
3045 { 1, { 2 }, 0 },
3046 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
3047 { 2, { 2, 2 }, 0 }, // 4
3048 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
3049 { 2, { 8, -2 }, 0 },
3050 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
3051 { 1, { 8 }, 0 }, // 8
3052 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
3053 { 2, { 8, 2 }, 0 },
3054 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
3055 { 3, { 8, 2, 2 }, 0 }, // 12
3056 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
3057 { 2, { 16, -2 }, 0 },
3058 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
3059 { 1, { 16 }, 0 }, // 16
3060 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
3061 { 2, { 16, 2 }, 0 },
3062 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
3063 { 3, { 16, 2, 2 }, 0 }, // 20
3064 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
3065 { 3, { 16, -2, 8 }, 0 },
3066 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
3067 { 2, { 16, 8 }, 0 }, // 24
3068 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
3069 { 3, { 16, 8, 2 }, 0 },
3070 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3071 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3072 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3073 { 3, { 16, -2, 16 }, 0 },
3074 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3077 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3078 will clobber the T bit. */
3079 bool
3080 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3082 gcc_assert (CONST_INT_P (shift_amount));
3084 const int shift_amount_i = INTVAL (shift_amount) & 31;
3086 /* Special case for shift count of 31: use and-rotl sequence. */
3087 if (shift_amount_i == 31)
3088 return true;
3090 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3091 & ASHL_CLOBBERS_T) != 0;
3094 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3095 instructions will clobber the T bit. */
3096 bool
3097 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3099 gcc_assert (CONST_INT_P (shift_amount));
3101 const int shift_amount_i = INTVAL (shift_amount) & 31;
3103 /* Special case for shift count of 31: use shll-movt sequence. */
3104 if (shift_amount_i == 31)
3105 return true;
3107 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3108 & LSHR_CLOBBERS_T) != 0;
3111 /* Return true if it is potentially beneficial to use a dynamic shift
3112 instruction (shad / shar) instead of a combination of 1/2/8/16
3113 shift instructions for the specified shift count.
3114 If dynamic shifts are not available, always return false. */
3115 bool
3116 sh_dynamicalize_shift_p (rtx count)
3118 gcc_assert (CONST_INT_P (count));
3120 const int shift_amount_i = INTVAL (count) & 31;
3121 int insn_count;
3123 /* For left and right shifts, there are shorter 2 insn sequences for
3124 shift amounts of 31. */
3125 if (shift_amount_i == 31)
3126 insn_count = 2;
3127 else
3128 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3130 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3133 /* Assuming we have a value that has been sign-extended by at least one bit,
3134 can we use the ext_shift_amounts with the last shift turned to an
3135 arithmetic shift to shift it by N without data loss, and quicker than by
3136 other means? */
3137 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3139 /* Return the cost of a shift. */
3140 static inline int
3141 shiftcosts (rtx x)
3143 int value;
3145 if (TARGET_SHMEDIA)
3146 return 1;
3148 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3150 if (GET_MODE (x) == DImode
3151 && CONST_INT_P (XEXP (x, 1))
3152 && INTVAL (XEXP (x, 1)) == 1)
3153 return 2;
3155 /* Everything else is invalid, because there is no pattern for it. */
3156 return -1;
3158 /* If shift by a non constant, then this will be expensive. */
3159 if (!CONST_INT_P (XEXP (x, 1)))
3160 return SH_DYNAMIC_SHIFT_COST;
3162 /* Otherwise, return the true cost in instructions. Cope with out of range
3163 shift counts more or less arbitrarily. */
3164 value = INTVAL (XEXP (x, 1)) & 31;
3166 if (GET_CODE (x) == ASHIFTRT)
3168 int cost = ashiftrt_insns[value];
3169 /* If dynamic shifts are available and profitable in this case, then we
3170 put the constant in a reg and use shad. */
3171 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3172 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3173 return cost;
3175 else
3176 return ashl_lshr_seq[value].insn_count;
3179 /* Return the cost of an AND/XOR/IOR operation. */
3180 static inline int
3181 and_xor_ior_costs (rtx x, int code)
3183 /* On SH1-4 we have only max. SImode operations.
3184 Double the cost for modes > SImode. */
3185 const int cost_scale = !TARGET_SHMEDIA
3186 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3187 ? 2 : 1;
3189 /* A logical operation with two registers is a single cycle
3190 instruction. */
3191 if (!CONST_INT_P (XEXP (x, 1)))
3192 return 1 * cost_scale;
3194 int i = INTVAL (XEXP (x, 1));
3196 if (TARGET_SHMEDIA)
3198 if (satisfies_constraint_I10 (XEXP (x, 1))
3199 || satisfies_constraint_J16 (XEXP (x, 1)))
3200 return 1;
3201 else
3202 return 1 + rtx_cost (XEXP (x, 1), AND, 1, !optimize_size);
3205 /* These constants are single cycle extu.[bw] instructions. */
3206 if ((i == 0xff || i == 0xffff) && code == AND)
3207 return 1 * cost_scale;
3208 /* Constants that can be used in an instruction as an immediate are
3209 a single cycle, but this requires r0, so make it a little more
3210 expensive. */
3211 if (CONST_OK_FOR_K08 (i))
3212 return 2 * cost_scale;
3213 /* Constants that can be loaded with a mov immediate need one more cycle.
3214 This case is probably unnecessary. */
3215 if (CONST_OK_FOR_I08 (i))
3216 return 2 * cost_scale;
3217 /* Any other constant requires an additional 2 cycle pc-relative load.
3218 This case is probably unnecessary. */
3219 return 3 * cost_scale;
3222 /* Return the cost of an addition or a subtraction. */
3223 static inline int
3224 addsubcosts (rtx x)
3226 if (GET_MODE (x) == SImode)
3228 /* The addc or subc patterns will eventually become one or two
3229 instructions. Below are some costs for some of the patterns
3230 which combine would reject because the costs of the individual
3231 insns in the patterns are lower.
3233 FIXME: It would be much easier if we had something like insn cost
3234 attributes and the cost calculation machinery used those attributes
3235 in the first place. This would eliminate redundant recog-like C
3236 code to calculate costs of complex patterns. */
3237 rtx op0 = XEXP (x, 0);
3238 rtx op1 = XEXP (x, 1);
3240 if (GET_CODE (x) == PLUS)
3242 if (GET_CODE (op0) == AND
3243 && XEXP (op0, 1) == const1_rtx
3244 && (GET_CODE (op1) == PLUS
3245 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3246 return 1;
3248 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3249 && GET_CODE (op1) == LSHIFTRT
3250 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3251 return 1;
3255 /* On SH1-4 we have only max. SImode operations.
3256 Double the cost for modes > SImode. */
3257 const int cost_scale = !TARGET_SHMEDIA
3258 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD
3259 ? 2 : 1;
3261 /* Adding a register is a single cycle insn. */
3262 if (REG_P (XEXP (x, 1))
3263 || GET_CODE (XEXP (x, 1)) == SUBREG)
3264 return 1 * cost_scale;
3266 /* Likewise for small constants. */
3267 if (CONST_INT_P (XEXP (x, 1))
3268 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3269 return 1 * cost_scale;
3271 if (TARGET_SHMEDIA)
3272 switch (GET_CODE (XEXP (x, 1)))
3274 case CONST:
3275 case LABEL_REF:
3276 case SYMBOL_REF:
3277 return TARGET_SHMEDIA64 ? 5 : 3;
3279 case CONST_INT:
3280 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
3281 return 2;
3282 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
3283 return 3;
3284 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
3285 return 4;
3287 /* Fall through. */
3288 default:
3289 return 5;
3292 /* Any other constant requires a 2 cycle pc-relative load plus an
3293 addition. */
3294 return 3 * cost_scale;
3297 /* Return the cost of a multiply. */
3298 static inline int
3299 multcosts (rtx x ATTRIBUTE_UNUSED)
3301 if (sh_multcost >= 0)
3302 return sh_multcost;
3303 if (TARGET_SHMEDIA)
3304 /* ??? We have a mul insn, but it has a latency of three, and doesn't
3305 accept constants. Ideally, we would use a cost of one or two and
3306 add the cost of the operand, but disregard the latter when inside loops
3307 and loop invariant code motion is still to follow.
3308 Using a multiply first and splitting it later if it's a loss
3309 doesn't work because of different sign / zero extension semantics
3310 of multiplies vs. shifts. */
3311 return optimize_size ? 2 : 3;
3313 if (TARGET_SH2)
3315 /* We have a mul insn, so we can never take more than the mul and the
3316 read of the mac reg, but count more because of the latency and extra
3317 reg usage. */
3318 if (optimize_size)
3319 return 2;
3320 return 3;
3323 /* If we're aiming at small code, then just count the number of
3324 insns in a multiply call sequence. */
3325 if (optimize_size)
3326 return 5;
3328 /* Otherwise count all the insns in the routine we'd be calling too. */
3329 return 20;
3332 /* Compute a (partial) cost for rtx X. Return true if the complete
3333 cost has been computed, and false if subexpressions should be
3334 scanned. In either case, *TOTAL contains the cost result. */
3335 static bool
3336 sh_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
3337 int *total, bool speed ATTRIBUTE_UNUSED)
3339 switch (code)
3341 /* The lower-subreg pass decides whether to split multi-word regs
3342 into individual regs by looking at the cost for a SET of certain
3343 modes with the following patterns:
3344 (set (reg) (reg))
3345 (set (reg) (const_int 0))
3346 On machines that support vector-move operations a multi-word move
3347 is the same cost as individual reg move. On SH there is no
3348 vector-move, so we have to provide the correct cost in the number
3349 of move insns to load/store the reg of the mode in question. */
3350 case SET:
3351 if (register_operand (SET_DEST (x), VOIDmode)
3352 && (register_operand (SET_SRC (x), VOIDmode)
3353 || satisfies_constraint_Z (SET_SRC (x))))
3355 const enum machine_mode mode = GET_MODE (SET_DEST (x));
3356 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3357 / mov_insn_size (mode, TARGET_SH2A));
3358 return true;
3360 return false;
3362 /* The cost of a mem access is mainly the cost of the address mode. */
3363 case MEM:
3364 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3365 true);
3366 return true;
3368 /* The cost of a sign or zero extend depends on whether the source is a
3369 reg or a mem. In case of a mem take the address into acount. */
3370 case SIGN_EXTEND:
3371 if (REG_P (XEXP (x, 0)))
3373 *total = COSTS_N_INSNS (1);
3374 return true;
3376 if (MEM_P (XEXP (x, 0)))
3378 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3379 GET_MODE (XEXP (x, 0)),
3380 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3381 return true;
3383 return false;
3385 case ZERO_EXTEND:
3386 if (REG_P (XEXP (x, 0)))
3388 *total = COSTS_N_INSNS (1);
3389 return true;
3391 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3392 && (GET_MODE (XEXP (x, 0)) == QImode
3393 || GET_MODE (XEXP (x, 0)) == HImode))
3395 /* Handle SH2A's movu.b and movu.w insn. */
3396 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3397 GET_MODE (XEXP (x, 0)),
3398 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3399 return true;
3401 return false;
3403 /* mems for SFmode and DFmode can be inside a parallel due to
3404 the way the fpscr is handled. */
3405 case PARALLEL:
3406 for (int i = 0; i < XVECLEN (x, 0); i++)
3408 rtx xx = XVECEXP (x, 0, i);
3409 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3411 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3412 GET_MODE (XEXP (xx, 0)),
3413 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3414 return true;
3416 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3418 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3419 GET_MODE (XEXP (xx, 1)),
3420 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3421 return true;
3425 if (sh_1el_vec (x, VOIDmode))
3426 *total = outer_code != SET;
3427 else if (sh_rep_vec (x, VOIDmode))
3428 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3429 + (outer_code != SET));
3430 else
3431 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3432 return true;
3434 case CONST_INT:
3435 if (TARGET_SHMEDIA)
3437 if (INTVAL (x) == 0)
3438 *total = 0;
3439 else if (outer_code == AND && and_operand ((x), DImode))
3440 *total = 0;
3441 else if ((outer_code == IOR || outer_code == XOR
3442 || outer_code == PLUS)
3443 && CONST_OK_FOR_I10 (INTVAL (x)))
3444 *total = 0;
3445 else if (CONST_OK_FOR_I16 (INTVAL (x)))
3446 *total = COSTS_N_INSNS (outer_code != SET);
3447 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
3448 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
3449 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
3450 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
3451 else
3452 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
3453 return true;
3455 if (CONST_OK_FOR_I08 (INTVAL (x)))
3456 *total = 0;
3457 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3458 && CONST_OK_FOR_K08 (INTVAL (x)))
3459 *total = 1;
3460 /* prepare_cmp_insn will force costly constants int registers before
3461 the cbranch[sd]i4 patterns can see them, so preserve potentially
3462 interesting ones not covered by I08 above. */
3463 else if (outer_code == COMPARE
3464 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3465 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3466 || INTVAL (x) == 0x7fffffff
3467 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3468 *total = 1;
3469 else
3470 *total = 8;
3471 return true;
3473 case EQ:
3474 /* An and with a constant compared against zero is
3475 most likely going to be a TST #imm, R0 instruction.
3476 Notice that this does not catch the zero_extract variants from
3477 the md file. */
3478 if (GET_CODE (XEXP (x, 0)) == AND
3479 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 0)
3481 *total = 1;
3482 return true;
3484 else
3485 return false;
3487 case SMIN:
3488 case SMAX:
3489 /* This is most likely a clips.b or clips.w insn that is being made up
3490 by combine. */
3491 if (TARGET_SH2A
3492 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3493 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3494 && REG_P (XEXP (XEXP (x, 0), 0))
3495 && CONST_INT_P (XEXP (x, 1)))
3497 *total = COSTS_N_INSNS (1);
3498 return true;
3500 else
3501 return false;
3503 case CONST:
3504 case LABEL_REF:
3505 case SYMBOL_REF:
3506 if (TARGET_SHMEDIA64)
3507 *total = COSTS_N_INSNS (4);
3508 else if (TARGET_SHMEDIA32)
3509 *total = COSTS_N_INSNS (2);
3510 else
3511 *total = 5;
3512 return true;
3514 case CONST_DOUBLE:
3515 if (TARGET_SHMEDIA)
3516 *total = COSTS_N_INSNS (4);
3517 /* prepare_cmp_insn will force costly constants int registers before
3518 the cbranchdi4 pattern can see them, so preserve potentially
3519 interesting ones. */
3520 else if (outer_code == COMPARE && GET_MODE (x) == DImode)
3521 *total = 1;
3522 else
3523 *total = 10;
3524 return true;
3526 case CONST_VECTOR:
3527 /* FIXME: This looks broken. Only the last statement has any effect.
3528 Probably this could be folded with the PARALLEL case? */
3529 if (x == CONST0_RTX (GET_MODE (x)))
3530 *total = 0;
3531 else if (sh_1el_vec (x, VOIDmode))
3532 *total = outer_code != SET;
3533 if (sh_rep_vec (x, VOIDmode))
3534 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3535 + (outer_code != SET));
3536 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3537 return true;
3539 case PLUS:
3540 case MINUS:
3541 *total = COSTS_N_INSNS (addsubcosts (x));
3542 return true;
3544 case AND:
3545 case XOR:
3546 case IOR:
3547 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3548 return true;
3550 case MULT:
3551 *total = COSTS_N_INSNS (multcosts (x));
3552 return true;
3554 case LT:
3555 case GE:
3556 /* div0s sign comparison. */
3557 if (GET_CODE (XEXP (x, 0)) == XOR
3558 && REG_P ((XEXP (XEXP (x, 0), 0)))
3559 && REG_P ((XEXP (XEXP (x, 0), 1)))
3560 && satisfies_constraint_Z (XEXP (x, 1)))
3562 *total = COSTS_N_INSNS (1);
3563 return true;
3565 else
3566 return false;
3568 case LSHIFTRT:
3569 /* div0s sign comparison. */
3570 if (GET_CODE (XEXP (x, 0)) == XOR
3571 && REG_P ((XEXP (XEXP (x, 0), 0)))
3572 && REG_P ((XEXP (XEXP (x, 0), 1)))
3573 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3575 *total = COSTS_N_INSNS (1);
3576 return true;
3578 /* Fall through to shiftcosts. */
3579 case ASHIFT:
3580 case ASHIFTRT:
3582 int cost = shiftcosts (x);
3583 if (cost < 0)
3584 return false;
3585 *total = COSTS_N_INSNS (cost);
3586 return true;
3589 case DIV:
3590 case UDIV:
3591 case MOD:
3592 case UMOD:
3593 *total = COSTS_N_INSNS (20);
3594 return true;
3596 case FLOAT:
3597 case FIX:
3598 *total = 100;
3599 return true;
3601 default:
3602 return false;
3606 /* Determine the size of the fundamental move insn that will be used
3607 for the specified mode. */
3608 static inline int
3609 mov_insn_size (enum machine_mode mode, bool consider_sh2a)
3611 const int mode_sz = GET_MODE_SIZE (mode);
3613 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3614 || (TARGET_FMOVD && mode == DFmode))
3615 return mode_sz;
3616 else
3618 /* The max. available mode for actual move insns is SImode.
3619 Larger accesses will be split into multiple loads/stores. */
3620 const int max_mov_sz = GET_MODE_SIZE (SImode);
3621 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3625 /* Determine the maximum possible displacement for a move insn for the
3626 specified mode. */
3628 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3630 /* The 4 byte displacement move insns are the same as the 2 byte
3631 versions but take a 12 bit displacement. All we need to do is to
3632 scale the max. displacement value accordingly. */
3633 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3635 /* SH2A supports FPU move insns with 12 bit displacements.
3636 Other variants to do not support any kind of displacements for
3637 FPU move insns. */
3638 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3639 return 0;
3640 else
3642 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3643 const int mode_sz = GET_MODE_SIZE (mode);
3644 int r = 15 * mov_insn_sz * disp_scale;
3646 /* If the mov insn will be split into multiple loads/stores, the
3647 maximum possible displacement is a bit smaller. */
3648 if (mode_sz > mov_insn_sz)
3649 r -= mode_sz - mov_insn_sz;
3650 return r;
3654 /* Determine the alignment mask for a move insn of the
3655 specified mode. */
3656 static inline int
3657 mov_insn_alignment_mask (enum machine_mode mode, bool consider_sh2a)
3659 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3660 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3663 /* Return the displacement value of a displacement address. */
3664 HOST_WIDE_INT
3665 sh_disp_addr_displacement (rtx x)
3667 gcc_assert (satisfies_constraint_Sdd (x));
3668 return INTVAL (XEXP (XEXP (x, 0), 1));
3671 /* Compute the cost of an address. */
3672 static int
3673 sh_address_cost (rtx x, enum machine_mode mode,
3674 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3676 /* 'GBR + 0'. Account one more because of R0 restriction. */
3677 if (REG_P (x) && REGNO (x) == GBR_REG)
3678 return 2;
3680 /* Simple reg, post-inc, pre-dec addressing. */
3681 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3682 return 1;
3684 /* 'reg + disp' addressing. */
3685 if (GET_CODE (x) == PLUS
3686 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3688 /* 'GBR + disp'. Account one more because of R0 restriction. */
3689 if (REGNO (XEXP (x, 0)) == GBR_REG
3690 && gbr_displacement (XEXP (x, 1), mode))
3691 return 2;
3693 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3695 if (offset == 0)
3696 return 1;
3698 /* The displacement would fit into a 2 byte move insn.
3699 HImode and QImode loads/stores with displacement put pressure on
3700 R0 which will most likely require another reg copy. Thus account
3701 a higher cost for that. */
3702 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3703 return (mode == HImode || mode == QImode) ? 2 : 1;
3705 /* The displacement would fit into a 4 byte move insn (SH2A). */
3706 if (TARGET_SH2A
3707 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3708 return 2;
3710 /* The displacement is probably out of range and will require extra
3711 calculations. */
3712 return 3;
3715 /* 'reg + reg' addressing. Account a slightly higher cost because of
3716 increased pressure on R0. */
3717 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))
3718 && ! TARGET_SHMEDIA)
3719 return 3;
3721 /* Not sure what it is - probably expensive. */
3722 return 10;
3725 /* Code to expand a shift. */
3726 static void
3727 gen_ashift (int type, int n, rtx reg)
3729 rtx n_rtx;
3731 /* Negative values here come from the shift_amounts array. */
3732 if (n < 0)
3734 if (type == ASHIFT)
3735 type = LSHIFTRT;
3736 else
3737 type = ASHIFT;
3738 n = -n;
3741 n_rtx = GEN_INT (n);
3742 gcc_assert (satisfies_constraint_P27 (n_rtx));
3744 switch (type)
3746 case ASHIFTRT:
3747 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3748 break;
3749 case LSHIFTRT:
3750 if (n == 1)
3751 emit_insn (gen_shlr (reg, reg));
3752 else
3753 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3754 break;
3755 case ASHIFT:
3756 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3757 break;
3758 default:
3759 gcc_unreachable ();
3763 /* Code to expand a HImode shift. */
3764 static void
3765 gen_ashift_hi (int type, int n, rtx reg)
3767 /* Negative values here come from the shift_amounts array. */
3768 if (n < 0)
3770 if (type == ASHIFT)
3771 type = LSHIFTRT;
3772 else
3773 type = ASHIFT;
3774 n = -n;
3777 switch (type)
3779 case ASHIFTRT:
3780 case LSHIFTRT:
3781 /* We don't have HImode right shift operations because using the
3782 ordinary 32 bit shift instructions for that doesn't generate proper
3783 zero/sign extension.
3784 gen_ashift_hi is only called in contexts where we know that the
3785 sign extension works out correctly. */
3787 int offset = 0;
3788 if (GET_CODE (reg) == SUBREG)
3790 offset = SUBREG_BYTE (reg);
3791 reg = SUBREG_REG (reg);
3793 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3794 break;
3796 case ASHIFT:
3797 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3798 break;
3802 /* Output RTL to split a constant shift into its component SH constant
3803 shift instructions. */
3804 void
3805 gen_shifty_op (int code, rtx *operands)
3807 int value = INTVAL (operands[2]);
3808 int max, i;
3810 /* Truncate the shift count in case it is out of bounds. */
3811 value = value & 31;
3813 if (value == 31)
3815 if (code == LSHIFTRT)
3817 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3818 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3819 return;
3821 else if (code == ASHIFT)
3823 /* There is a two instruction sequence for 31 bit left shifts,
3824 but it requires r0. */
3825 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3827 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3828 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3829 return;
3833 else if (value == 0)
3835 /* This can happen even when optimizing, if there were subregs before
3836 reload. Don't output a nop here, as this is never optimized away;
3837 use a no-op move instead. */
3838 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
3839 return;
3842 max = ashl_lshr_seq[value].insn_count;
3843 for (i = 0; i < max; i++)
3844 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3847 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3848 don't matter. */
3849 void
3850 gen_shifty_hi_op (int code, rtx *operands)
3852 int value = INTVAL (operands[2]);
3853 int max, i;
3854 void (*gen_fun) (int, int, rtx);
3856 /* This operation is used by and_shl for SImode values with a few
3857 high bits known to be cleared. */
3858 value &= 31;
3859 if (value == 0)
3861 emit_insn (gen_nop ());
3862 return;
3865 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3866 if (code == ASHIFT)
3868 max = ext_ashl_lshr_seq[value].insn_count;
3869 for (i = 0; i < max; i++)
3870 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3872 else
3873 /* When shifting right, emit the shifts in reverse order, so that
3874 solitary negative values come first. */
3875 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3876 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3879 /* Output RTL for an arithmetic right shift.
3880 ??? Rewrite to use super-optimizer sequences. */
3881 bool
3882 expand_ashiftrt (rtx *operands)
3884 rtx wrk;
3885 char func[18];
3886 int value;
3888 if (TARGET_DYNSHIFT)
3890 if (!CONST_INT_P (operands[2]))
3892 rtx count = copy_to_mode_reg (SImode, operands[2]);
3893 emit_insn (gen_negsi2 (count, count));
3894 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3895 return true;
3897 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3898 > 1 + SH_DYNAMIC_SHIFT_COST)
3900 rtx count
3901 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3902 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3903 return true;
3906 if (!CONST_INT_P (operands[2]))
3907 return false;
3909 value = INTVAL (operands[2]) & 31;
3911 if (value == 31)
3913 /* If we are called from abs expansion, arrange things so that we
3914 we can use a single MT instruction that doesn't clobber the source,
3915 if LICM can hoist out the load of the constant zero. */
3916 if (currently_expanding_to_rtl)
3918 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3919 operands[1]));
3920 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3921 return true;
3923 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3924 return true;
3926 else if (value >= 16 && value <= 19)
3928 wrk = gen_reg_rtx (SImode);
3929 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3930 value -= 16;
3931 while (value--)
3932 gen_ashift (ASHIFTRT, 1, wrk);
3933 emit_move_insn (operands[0], wrk);
3934 return true;
3936 /* Expand a short sequence inline, longer call a magic routine. */
3937 else if (value <= 5)
3939 wrk = gen_reg_rtx (SImode);
3940 emit_move_insn (wrk, operands[1]);
3941 while (value--)
3942 gen_ashift (ASHIFTRT, 1, wrk);
3943 emit_move_insn (operands[0], wrk);
3944 return true;
3947 wrk = gen_reg_rtx (Pmode);
3949 /* Load the value into an arg reg and call a helper. */
3950 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3951 sprintf (func, "__ashiftrt_r4_%d", value);
3952 function_symbol (wrk, func, SFUNC_STATIC);
3953 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
3954 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3955 return true;
3958 /* Try to find a good way to implement the combiner pattern
3959 [(set (match_operand:SI 0 "register_operand" "r")
3960 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3961 (match_operand:SI 2 "const_int_operand" "n"))
3962 (match_operand:SI 3 "const_int_operand" "n"))) .
3963 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3964 return 0 for simple right / left or left/right shift combination.
3965 return 1 for a combination of shifts with zero_extend.
3966 return 2 for a combination of shifts with an AND that needs r0.
3967 return 3 for a combination of shifts with an AND that needs an extra
3968 scratch register, when the three highmost bits of the AND mask are clear.
3969 return 4 for a combination of shifts with an AND that needs an extra
3970 scratch register, when any of the three highmost bits of the AND mask
3971 is set.
3972 If ATTRP is set, store an initial right shift width in ATTRP[0],
3973 and the instruction length in ATTRP[1] . These values are not valid
3974 when returning 0.
3975 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3976 shift_amounts for the last shift value that is to be used before the
3977 sign extend. */
3979 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3981 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3982 int left = INTVAL (left_rtx), right;
3983 int best = 0;
3984 int cost, best_cost = 10000;
3985 int best_right = 0, best_len = 0;
3986 int i;
3987 int can_ext;
3989 if (left < 0 || left > 31)
3990 return 0;
3991 if (CONST_INT_P (mask_rtx))
3992 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3993 else
3994 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3995 /* Can this be expressed as a right shift / left shift pair? */
3996 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3997 right = exact_log2 (lsb);
3998 mask2 = ~(mask + lsb - 1);
3999 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
4000 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
4001 if (! mask2)
4002 best_cost = ashl_lshr_seq[right].insn_count
4003 + ashl_lshr_seq[right + left].insn_count;
4004 /* mask has no trailing zeroes <==> ! right */
4005 else if (! right && mask2 == ~(lsb2 - 1))
4007 int late_right = exact_log2 (lsb2);
4008 best_cost = ashl_lshr_seq[left + late_right].insn_count
4009 + ashl_lshr_seq[late_right].insn_count;
4011 /* Try to use zero extend. */
4012 if (mask2 == ~(lsb2 - 1))
4014 int width, first;
4016 for (width = 8; width <= 16; width += 8)
4018 /* Can we zero-extend right away? */
4019 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
4021 cost = 1 + ext_ashl_lshr_seq[right].insn_count
4022 + ext_ashl_lshr_seq[left + right].insn_count;
4023 if (cost < best_cost)
4025 best = 1;
4026 best_cost = cost;
4027 best_right = right;
4028 best_len = cost;
4029 if (attrp)
4030 attrp[2] = -1;
4032 continue;
4034 /* ??? Could try to put zero extend into initial right shift,
4035 or even shift a bit left before the right shift. */
4036 /* Determine value of first part of left shift, to get to the
4037 zero extend cut-off point. */
4038 first = width - exact_log2 (lsb2) + right;
4039 if (first >= 0 && right + left - first >= 0)
4041 cost = ext_ashl_lshr_seq[right].insn_count
4042 + ext_ashl_lshr_seq[first].insn_count + 1
4043 + ext_ashl_lshr_seq[right + left - first].insn_count;
4045 if (cost < best_cost)
4047 best = 1;
4048 best_cost = cost;
4049 best_right = right;
4050 best_len = cost;
4051 if (attrp)
4052 attrp[2] = first;
4057 /* Try to use r0 AND pattern */
4058 for (i = 0; i <= 2; i++)
4060 if (i > right)
4061 break;
4062 if (! CONST_OK_FOR_K08 (mask >> i))
4063 continue;
4064 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
4065 if (cost < best_cost)
4067 best = 2;
4068 best_cost = cost;
4069 best_right = i;
4070 best_len = cost - 1;
4073 /* Try to use a scratch register to hold the AND operand. */
4074 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4075 for (i = 0; i <= 2; i++)
4077 if (i > right)
4078 break;
4079 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4080 + (can_ext
4081 ? ext_ashl_lshr_seq
4082 : ashl_lshr_seq)[left + i].insn_count;
4083 if (cost < best_cost)
4085 best = 4 - can_ext;
4086 best_cost = cost;
4087 best_right = i;
4088 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4092 if (attrp)
4094 attrp[0] = best_right;
4095 attrp[1] = best_len;
4097 return best;
4100 /* This is used in length attributes of the unnamed instructions
4101 corresponding to shl_and_kind return values of 1 and 2. */
4103 shl_and_length (rtx insn)
4105 rtx set_src, left_rtx, mask_rtx;
4106 int attributes[3];
4108 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4109 left_rtx = XEXP (XEXP (set_src, 0), 1);
4110 mask_rtx = XEXP (set_src, 1);
4111 shl_and_kind (left_rtx, mask_rtx, attributes);
4112 return attributes[1];
4115 /* This is used in length attribute of the and_shl_scratch instruction. */
4117 shl_and_scr_length (rtx insn)
4119 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4120 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4121 rtx op = XEXP (set_src, 0);
4122 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4123 op = XEXP (XEXP (op, 0), 0);
4124 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4127 /* Generate rtl for instructions for which shl_and_kind advised a particular
4128 method of generating them, i.e. returned zero. */
4129 bool
4130 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4132 int attributes[3];
4133 unsigned HOST_WIDE_INT mask;
4134 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4135 int right, total_shift;
4136 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4138 right = attributes[0];
4139 total_shift = INTVAL (left_rtx) + right;
4140 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4141 switch (kind)
4143 default:
4144 return true;
4145 case 1:
4147 int first = attributes[2];
4148 rtx operands[3];
4150 if (first < 0)
4152 emit_insn ((mask << right) <= 0xff
4153 ? gen_zero_extendqisi2 (dest,
4154 gen_lowpart (QImode, source))
4155 : gen_zero_extendhisi2 (dest,
4156 gen_lowpart (HImode, source)));
4157 source = dest;
4159 if (source != dest)
4160 emit_insn (gen_movsi (dest, source));
4161 operands[0] = dest;
4162 if (right)
4164 operands[2] = GEN_INT (right);
4165 gen_shifty_hi_op (LSHIFTRT, operands);
4167 if (first > 0)
4169 operands[2] = GEN_INT (first);
4170 gen_shifty_hi_op (ASHIFT, operands);
4171 total_shift -= first;
4172 mask <<= first;
4174 if (first >= 0)
4175 emit_insn (mask <= 0xff
4176 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4177 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4178 if (total_shift > 0)
4180 operands[2] = GEN_INT (total_shift);
4181 gen_shifty_hi_op (ASHIFT, operands);
4183 break;
4185 case 4:
4186 shift_gen_fun = gen_shifty_op;
4187 case 3:
4188 /* If the topmost bit that matters is set, set the topmost bits
4189 that don't matter. This way, we might be able to get a shorter
4190 signed constant. */
4191 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4192 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
4193 case 2:
4194 /* Don't expand fine-grained when combining, because that will
4195 make the pattern fail. */
4196 if (currently_expanding_to_rtl
4197 || reload_in_progress || reload_completed)
4199 rtx operands[3];
4201 /* Cases 3 and 4 should be handled by this split
4202 only while combining */
4203 gcc_assert (kind <= 2);
4204 if (right)
4206 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4207 source = dest;
4209 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4210 if (total_shift)
4212 operands[0] = dest;
4213 operands[1] = dest;
4214 operands[2] = GEN_INT (total_shift);
4215 shift_gen_fun (ASHIFT, operands);
4217 break;
4219 else
4221 int neg = 0;
4222 if (kind != 4 && total_shift < 16)
4224 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4225 if (neg > 0)
4226 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4227 else
4228 neg = 0;
4230 emit_insn (gen_and_shl_scratch (dest, source,
4231 GEN_INT (right),
4232 GEN_INT (mask),
4233 GEN_INT (total_shift + neg),
4234 GEN_INT (neg)));
4235 emit_insn (gen_movsi (dest, dest));
4236 break;
4239 return false;
4242 /* Try to find a good way to implement the combiner pattern
4243 [(set (match_operand:SI 0 "register_operand" "=r")
4244 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4245 (match_operand:SI 2 "const_int_operand" "n")
4246 (match_operand:SI 3 "const_int_operand" "n")
4247 (const_int 0)))
4248 (clobber (reg:SI T_REG))]
4249 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4250 return 0 for simple left / right shift combination.
4251 return 1 for left shift / 8 bit sign extend / left shift.
4252 return 2 for left shift / 16 bit sign extend / left shift.
4253 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4254 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4255 return 5 for left shift / 16 bit sign extend / right shift
4256 return 6 for < 8 bit sign extend / left shift.
4257 return 7 for < 8 bit sign extend / left shift / single right shift.
4258 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4260 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4262 int left, size, insize, ext;
4263 int cost = 0, best_cost;
4264 int kind;
4266 left = INTVAL (left_rtx);
4267 size = INTVAL (size_rtx);
4268 insize = size - left;
4269 gcc_assert (insize > 0);
4270 /* Default to left / right shift. */
4271 kind = 0;
4272 best_cost = ashl_lshr_seq[32 - insize].insn_count
4273 + ashl_lshr_seq[32 - size].insn_count;
4274 if (size <= 16)
4276 /* 16 bit shift / sign extend / 16 bit shift */
4277 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4278 + ashl_lshr_seq[16 - size].insn_count;
4279 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4280 below, by alternative 3 or something even better. */
4281 if (cost < best_cost)
4283 kind = 5;
4284 best_cost = cost;
4287 /* Try a plain sign extend between two shifts. */
4288 for (ext = 16; ext >= insize; ext -= 8)
4290 if (ext <= size)
4292 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4293 + ashl_lshr_seq[size - ext].insn_count;
4294 if (cost < best_cost)
4296 kind = ext / (unsigned) 8;
4297 best_cost = cost;
4300 /* Check if we can do a sloppy shift with a final signed shift
4301 restoring the sign. */
4302 if (EXT_SHIFT_SIGNED (size - ext))
4303 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4304 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4305 /* If not, maybe it's still cheaper to do the second shift sloppy,
4306 and do a final sign extend? */
4307 else if (size <= 16)
4308 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4309 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4310 + 1;
4311 else
4312 continue;
4313 if (cost < best_cost)
4315 kind = ext / (unsigned) 8 + 2;
4316 best_cost = cost;
4319 /* Check if we can sign extend in r0 */
4320 if (insize < 8)
4322 cost = 3 + ashl_lshr_seq[left].insn_count;
4323 if (cost < best_cost)
4325 kind = 6;
4326 best_cost = cost;
4328 /* Try the same with a final signed shift. */
4329 if (left < 31)
4331 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4332 if (cost < best_cost)
4334 kind = 7;
4335 best_cost = cost;
4339 if (TARGET_DYNSHIFT)
4341 /* Try to use a dynamic shift. */
4342 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4343 if (cost < best_cost)
4345 kind = 0;
4346 best_cost = cost;
4349 if (costp)
4350 *costp = cost;
4351 return kind;
4354 /* Function to be used in the length attribute of the instructions
4355 implementing this pattern. */
4357 shl_sext_length (rtx insn)
4359 rtx set_src, left_rtx, size_rtx;
4360 int cost;
4362 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4363 left_rtx = XEXP (XEXP (set_src, 0), 1);
4364 size_rtx = XEXP (set_src, 1);
4365 shl_sext_kind (left_rtx, size_rtx, &cost);
4366 return cost;
4369 /* Generate rtl for this pattern */
4370 bool
4371 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4373 int kind;
4374 int left, size, insize, cost;
4375 rtx operands[3];
4377 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4378 left = INTVAL (left_rtx);
4379 size = INTVAL (size_rtx);
4380 insize = size - left;
4381 switch (kind)
4383 case 1:
4384 case 2:
4385 case 3:
4386 case 4:
4388 int ext = kind & 1 ? 8 : 16;
4389 int shift2 = size - ext;
4391 /* Don't expand fine-grained when combining, because that will
4392 make the pattern fail. */
4393 if (! currently_expanding_to_rtl
4394 && ! reload_in_progress && ! reload_completed)
4396 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4397 emit_insn (gen_movsi (dest, source));
4398 break;
4400 if (dest != source)
4401 emit_insn (gen_movsi (dest, source));
4402 operands[0] = dest;
4403 if (ext - insize)
4405 operands[2] = GEN_INT (ext - insize);
4406 gen_shifty_hi_op (ASHIFT, operands);
4408 emit_insn (kind & 1
4409 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4410 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4411 if (kind <= 2)
4413 if (shift2)
4415 operands[2] = GEN_INT (shift2);
4416 gen_shifty_op (ASHIFT, operands);
4419 else
4421 if (shift2 > 0)
4423 if (EXT_SHIFT_SIGNED (shift2))
4425 operands[2] = GEN_INT (shift2 + 1);
4426 gen_shifty_op (ASHIFT, operands);
4427 operands[2] = const1_rtx;
4428 gen_shifty_op (ASHIFTRT, operands);
4429 break;
4431 operands[2] = GEN_INT (shift2);
4432 gen_shifty_hi_op (ASHIFT, operands);
4434 else if (shift2)
4436 operands[2] = GEN_INT (-shift2);
4437 gen_shifty_hi_op (LSHIFTRT, operands);
4439 emit_insn (size <= 8
4440 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4441 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4443 break;
4445 case 5:
4447 int i = 16 - size;
4448 if (! currently_expanding_to_rtl
4449 && ! reload_in_progress && ! reload_completed)
4450 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4451 else
4453 operands[0] = dest;
4454 operands[2] = GEN_INT (16 - insize);
4455 gen_shifty_hi_op (ASHIFT, operands);
4456 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4458 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4459 while (--i >= 0)
4460 gen_ashift (ASHIFTRT, 1, dest);
4461 break;
4463 case 6:
4464 case 7:
4465 /* Don't expand fine-grained when combining, because that will
4466 make the pattern fail. */
4467 if (! currently_expanding_to_rtl
4468 && ! reload_in_progress && ! reload_completed)
4470 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4471 emit_insn (gen_movsi (dest, source));
4472 break;
4474 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4475 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4476 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
4477 operands[0] = dest;
4478 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4479 gen_shifty_op (ASHIFT, operands);
4480 if (kind == 7)
4481 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4482 break;
4483 default:
4484 return true;
4486 return false;
4489 /* Prefix a symbol_ref name with "datalabel". */
4491 gen_datalabel_ref (rtx sym)
4493 const char *str;
4495 if (GET_CODE (sym) == LABEL_REF)
4496 return gen_rtx_CONST (GET_MODE (sym),
4497 gen_rtx_UNSPEC (GET_MODE (sym),
4498 gen_rtvec (1, sym),
4499 UNSPEC_DATALABEL));
4501 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
4503 str = XSTR (sym, 0);
4504 /* Share all SYMBOL_REF strings with the same value - that is important
4505 for cse. */
4506 str = IDENTIFIER_POINTER (get_identifier (str));
4507 XSTR (sym, 0) = str;
4509 return sym;
4513 static alloc_pool label_ref_list_pool;
4515 typedef struct label_ref_list_d
4517 rtx_code_label *label;
4518 struct label_ref_list_d *next;
4519 } *label_ref_list_t;
4521 /* The SH cannot load a large constant into a register, constants have to
4522 come from a pc relative load. The reference of a pc relative load
4523 instruction must be less than 1k in front of the instruction. This
4524 means that we often have to dump a constant inside a function, and
4525 generate code to branch around it.
4527 It is important to minimize this, since the branches will slow things
4528 down and make things bigger.
4530 Worst case code looks like:
4532 mov.l L1,rn
4533 bra L2
4535 align
4536 L1: .long value
4540 mov.l L3,rn
4541 bra L4
4543 align
4544 L3: .long value
4548 We fix this by performing a scan before scheduling, which notices which
4549 instructions need to have their operands fetched from the constant table
4550 and builds the table.
4552 The algorithm is:
4554 scan, find an instruction which needs a pcrel move. Look forward, find the
4555 last barrier which is within MAX_COUNT bytes of the requirement.
4556 If there isn't one, make one. Process all the instructions between
4557 the find and the barrier.
4559 In the above example, we can tell that L3 is within 1k of L1, so
4560 the first move can be shrunk from the 3 insn+constant sequence into
4561 just 1 insn, and the constant moved to L3 to make:
4563 mov.l L1,rn
4565 mov.l L3,rn
4566 bra L4
4568 align
4569 L3:.long value
4570 L4:.long value
4572 Then the second move becomes the target for the shortening process. */
4574 typedef struct
4576 rtx value; /* Value in table. */
4577 rtx_code_label *label; /* Label of value. */
4578 label_ref_list_t wend; /* End of window. */
4579 enum machine_mode mode; /* Mode of value. */
4581 /* True if this constant is accessed as part of a post-increment
4582 sequence. Note that HImode constants are never accessed in this way. */
4583 bool part_of_sequence_p;
4584 } pool_node;
4586 /* The maximum number of constants that can fit into one pool, since
4587 constants in the range 0..510 are at least 2 bytes long, and in the
4588 range from there to 1018 at least 4 bytes. */
4590 #define MAX_POOL_SIZE 372
4591 static pool_node pool_vector[MAX_POOL_SIZE];
4592 static int pool_size;
4593 static rtx_code_label *pool_window_label;
4594 static int pool_window_last;
4596 static int max_labelno_before_reorg;
4598 /* ??? If we need a constant in HImode which is the truncated value of a
4599 constant we need in SImode, we could combine the two entries thus saving
4600 two bytes. Is this common enough to be worth the effort of implementing
4601 it? */
4603 /* ??? This stuff should be done at the same time that we shorten branches.
4604 As it is now, we must assume that all branches are the maximum size, and
4605 this causes us to almost always output constant pools sooner than
4606 necessary. */
4608 /* Add a constant to the pool and return its label. */
4609 static rtx_code_label *
4610 add_constant (rtx x, enum machine_mode mode, rtx last_value)
4612 int i;
4613 rtx_code_label *lab, *new_rtx;
4614 label_ref_list_t ref, newref;
4616 /* First see if we've already got it. */
4617 for (i = 0; i < pool_size; i++)
4619 if (x->code == pool_vector[i].value->code
4620 && mode == pool_vector[i].mode)
4622 if (x->code == CODE_LABEL)
4624 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4625 continue;
4627 if (rtx_equal_p (x, pool_vector[i].value))
4629 lab = new_rtx = 0;
4630 if (! last_value
4631 || ! i
4632 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4634 new_rtx = gen_label_rtx ();
4635 LABEL_REFS (new_rtx) = pool_vector[i].label;
4636 pool_vector[i].label = lab = new_rtx;
4638 if (lab && pool_window_label)
4640 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4641 newref->label = pool_window_label;
4642 ref = pool_vector[pool_window_last].wend;
4643 newref->next = ref;
4644 pool_vector[pool_window_last].wend = newref;
4646 if (new_rtx)
4647 pool_window_label = new_rtx;
4648 pool_window_last = i;
4649 return lab;
4654 /* Need a new one. */
4655 pool_vector[pool_size].value = x;
4656 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4658 lab = 0;
4659 pool_vector[pool_size - 1].part_of_sequence_p = true;
4661 else
4662 lab = gen_label_rtx ();
4663 pool_vector[pool_size].mode = mode;
4664 pool_vector[pool_size].label = lab;
4665 pool_vector[pool_size].wend = NULL;
4666 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4667 if (lab && pool_window_label)
4669 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
4670 newref->label = pool_window_label;
4671 ref = pool_vector[pool_window_last].wend;
4672 newref->next = ref;
4673 pool_vector[pool_window_last].wend = newref;
4675 if (lab)
4676 pool_window_label = lab;
4677 pool_window_last = pool_size;
4678 pool_size++;
4679 return lab;
4682 /* Output the literal table. START, if nonzero, is the first instruction
4683 this table is needed for, and also indicates that there is at least one
4684 casesi_worker_2 instruction; We have to emit the operand3 labels from
4685 these insns at a 4-byte aligned position. BARRIER is the barrier
4686 after which we are to place the table. */
4687 static void
4688 dump_table (rtx_insn *start, rtx_insn *barrier)
4690 rtx_insn *scan = barrier;
4691 int i;
4692 bool need_align = true;
4693 rtx lab;
4694 label_ref_list_t ref;
4695 bool have_df = false;
4697 /* Do two passes, first time dump out the HI sized constants. */
4699 for (i = 0; i < pool_size; i++)
4701 pool_node *p = &pool_vector[i];
4703 if (p->mode == HImode)
4705 if (need_align)
4707 scan = emit_insn_after (gen_align_2 (), scan);
4708 need_align = false;
4710 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4711 scan = emit_label_after (lab, scan);
4712 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4713 scan);
4714 for (ref = p->wend; ref; ref = ref->next)
4716 lab = ref->label;
4717 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4720 else if (p->mode == DFmode)
4721 have_df = true;
4724 need_align = true;
4726 if (start)
4728 scan = emit_insn_after (gen_align_4 (), scan);
4729 need_align = false;
4730 for (; start != barrier; start = NEXT_INSN (start))
4731 if (NONJUMP_INSN_P (start)
4732 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4734 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4735 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4737 scan = emit_label_after (lab, scan);
4740 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4742 rtx_insn *align_insn = NULL;
4744 scan = emit_label_after (gen_label_rtx (), scan);
4745 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4746 need_align = false;
4748 for (i = 0; i < pool_size; i++)
4750 pool_node *p = &pool_vector[i];
4752 switch (p->mode)
4754 case HImode:
4755 break;
4756 case SImode:
4757 case SFmode:
4758 if (align_insn && !p->part_of_sequence_p)
4760 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4761 emit_label_before (lab, align_insn);
4762 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4763 align_insn);
4764 for (ref = p->wend; ref; ref = ref->next)
4766 lab = ref->label;
4767 emit_insn_before (gen_consttable_window_end (lab),
4768 align_insn);
4770 delete_insn (align_insn);
4771 align_insn = NULL;
4772 continue;
4774 else
4776 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4777 scan = emit_label_after (lab, scan);
4778 scan = emit_insn_after (gen_consttable_4 (p->value,
4779 const0_rtx), scan);
4780 need_align = ! need_align;
4782 break;
4783 case DFmode:
4784 if (need_align)
4786 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4787 align_insn = scan;
4788 need_align = false;
4790 case DImode:
4791 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4792 scan = emit_label_after (lab, scan);
4793 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4794 scan);
4795 break;
4796 default:
4797 gcc_unreachable ();
4800 if (p->mode != HImode)
4802 for (ref = p->wend; ref; ref = ref->next)
4804 lab = ref->label;
4805 scan = emit_insn_after (gen_consttable_window_end (lab),
4806 scan);
4811 pool_size = 0;
4814 for (i = 0; i < pool_size; i++)
4816 pool_node *p = &pool_vector[i];
4818 switch (p->mode)
4820 case HImode:
4821 break;
4822 case SImode:
4823 case SFmode:
4824 if (need_align)
4826 need_align = false;
4827 scan = emit_label_after (gen_label_rtx (), scan);
4828 scan = emit_insn_after (gen_align_4 (), scan);
4830 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4831 scan = emit_label_after (lab, scan);
4832 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4833 scan);
4834 break;
4835 case DFmode:
4836 case DImode:
4837 if (need_align)
4839 need_align = false;
4840 scan = emit_label_after (gen_label_rtx (), scan);
4841 scan = emit_insn_after (gen_align_4 (), scan);
4843 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4844 scan = emit_label_after (lab, scan);
4845 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4846 scan);
4847 break;
4848 default:
4849 gcc_unreachable ();
4852 if (p->mode != HImode)
4854 for (ref = p->wend; ref; ref = ref->next)
4856 lab = ref->label;
4857 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4862 scan = emit_insn_after (gen_consttable_end (), scan);
4863 scan = emit_barrier_after (scan);
4864 pool_size = 0;
4865 pool_window_label = NULL;
4866 pool_window_last = 0;
4869 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4871 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4873 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4874 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4875 need to fix it if the input value is CONST_OK_FOR_I08. */
4876 static bool
4877 broken_move (rtx_insn *insn)
4879 if (NONJUMP_INSN_P (insn))
4881 rtx pat = PATTERN (insn);
4882 if (GET_CODE (pat) == PARALLEL)
4883 pat = XVECEXP (pat, 0, 0);
4884 if (GET_CODE (pat) == SET
4885 /* We can load any 8-bit value if we don't care what the high
4886 order bits end up as. */
4887 && GET_MODE (SET_DEST (pat)) != QImode
4888 && (CONSTANT_P (SET_SRC (pat))
4889 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4890 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4891 /* Match mova_const. */
4892 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4893 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4894 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4895 && ! (TARGET_SH2E
4896 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4897 && (fp_zero_operand (SET_SRC (pat))
4898 || fp_one_operand (SET_SRC (pat)))
4899 /* In general we don't know the current setting of fpscr, so
4900 disable fldi.
4901 There is an exception if this was a register-register move
4902 before reload - and hence it was ascertained that we have
4903 single precision setting - and in a post-reload optimization
4904 we changed this to do a constant load. In that case
4905 we don't have an r0 clobber, hence we must use fldi. */
4906 && (TARGET_FMOVD
4907 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4908 == SCRATCH))
4909 && REG_P (SET_DEST (pat))
4910 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4911 && ! (TARGET_SH2A
4912 && GET_MODE (SET_DEST (pat)) == SImode
4913 && (satisfies_constraint_I20 (SET_SRC (pat))
4914 || satisfies_constraint_I28 (SET_SRC (pat))))
4915 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4916 return true;
4919 return false;
4922 /* Return true if the specified insn is a mova insn. */
4923 static bool
4924 mova_p (rtx_insn *insn)
4926 return (NONJUMP_INSN_P (insn)
4927 && GET_CODE (PATTERN (insn)) == SET
4928 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4929 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4930 /* Don't match mova_const. */
4931 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4934 /* Fix up a mova from a switch that went out of range. */
4935 static void
4936 fixup_mova (rtx_insn *mova)
4938 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4939 if (! flag_pic)
4941 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4942 INSN_CODE (mova) = -1;
4944 else
4946 rtx_insn *worker = mova;
4947 rtx_code_label *lab = gen_label_rtx ();
4948 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4952 worker = NEXT_INSN (worker);
4953 gcc_assert (worker
4954 && !LABEL_P (worker)
4955 && !JUMP_P (worker));
4956 } while (NOTE_P (worker)
4957 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4958 wpat = PATTERN (worker);
4959 wpat0 = XVECEXP (wpat, 0, 0);
4960 wpat1 = XVECEXP (wpat, 0, 1);
4961 wsrc = SET_SRC (wpat0);
4962 PATTERN (worker) = (gen_casesi_worker_2
4963 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4964 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4965 XEXP (wpat1, 0)));
4966 INSN_CODE (worker) = -1;
4967 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4968 base = gen_rtx_LABEL_REF (Pmode, lab);
4969 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4970 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4971 INSN_CODE (mova) = -1;
4975 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4976 *num_mova, and check if the new mova is not nested within the first one.
4977 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4978 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4979 static int
4980 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
4982 int n_addr = 0; /* Initialization to shut up spurious warning. */
4983 int f_target, n_target = 0; /* Likewise. */
4985 if (optimize)
4987 /* If NEW_MOVA has no address yet, it will be handled later. */
4988 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4989 return -1;
4991 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4992 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4993 if (n_addr > n_target || n_addr + 1022 < n_target)
4995 /* Change the mova into a load.
4996 broken_move will then return true for it. */
4997 fixup_mova (new_mova);
4998 return 1;
5001 if (!(*num_mova)++)
5003 *first_mova = new_mova;
5004 return 2;
5006 if (!optimize
5007 || ((f_target
5008 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
5009 >= n_target))
5010 return -1;
5012 (*num_mova)--;
5013 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
5014 > n_target - n_addr)
5016 fixup_mova (*first_mova);
5017 return 0;
5019 else
5021 fixup_mova (new_mova);
5022 return 1;
5026 /* Find the last barrier from insn FROM which is close enough to hold the
5027 constant pool. If we can't find one, then create one near the end of
5028 the range. */
5029 static rtx_insn *
5030 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
5032 int count_si = 0;
5033 int count_hi = 0;
5034 int found_hi = 0;
5035 int found_si = 0;
5036 int found_di = 0;
5037 int hi_align = 2;
5038 int si_align = 2;
5039 int leading_mova = num_mova;
5040 rtx_insn *barrier_before_mova = NULL;
5041 rtx_insn *found_barrier = NULL;
5042 rtx_insn *good_barrier = NULL;
5043 int si_limit;
5044 int hi_limit;
5045 rtx_insn *orig = from;
5046 rtx_insn *last_got = NULL;
5047 rtx_insn *last_symoff = NULL;
5049 /* For HImode: range is 510, add 4 because pc counts from address of
5050 second instruction after this one, subtract 2 for the jump instruction
5051 that we may need to emit before the table, subtract 2 for the instruction
5052 that fills the jump delay slot (in very rare cases, reorg will take an
5053 instruction from after the constant pool or will leave the delay slot
5054 empty). This gives 510.
5055 For SImode: range is 1020, add 4 because pc counts from address of
5056 second instruction after this one, subtract 2 in case pc is 2 byte
5057 aligned, subtract 2 for the jump instruction that we may need to emit
5058 before the table, subtract 2 for the instruction that fills the jump
5059 delay slot. This gives 1018. */
5061 /* The branch will always be shortened now that the reference address for
5062 forward branches is the successor address, thus we need no longer make
5063 adjustments to the [sh]i_limit for -O0. */
5065 si_limit = 1018;
5066 hi_limit = 510;
5068 while (from && count_si < si_limit && count_hi < hi_limit)
5070 int inc = get_attr_length (from);
5071 int new_align = 1;
5073 /* If this is a label that existed at the time of the compute_alignments
5074 call, determine the alignment. N.B. When find_barrier recurses for
5075 an out-of-reach mova, we might see labels at the start of previously
5076 inserted constant tables. */
5077 if (LABEL_P (from)
5078 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
5080 if (optimize)
5081 new_align = 1 << label_to_alignment (from);
5082 else if (BARRIER_P (prev_nonnote_insn (from)))
5083 new_align = 1 << barrier_align (from);
5084 else
5085 new_align = 1;
5086 inc = 0;
5088 /* In case we are scanning a constant table because of recursion, check
5089 for explicit alignments. If the table is long, we might be forced
5090 to emit the new table in front of it; the length of the alignment
5091 might be the last straw. */
5092 else if (NONJUMP_INSN_P (from)
5093 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5094 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5095 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5096 /* When we find the end of a constant table, paste the new constant
5097 at the end. That is better than putting it in front because
5098 this way, we don't need extra alignment for adding a 4-byte-aligned
5099 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5100 else if (NONJUMP_INSN_P (from)
5101 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5102 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5103 return from;
5105 if (BARRIER_P (from))
5107 rtx_insn *next;
5109 found_barrier = from;
5111 /* If we are at the end of the function, or in front of an alignment
5112 instruction, we need not insert an extra alignment. We prefer
5113 this kind of barrier. */
5114 if (barrier_align (from) > 2)
5115 good_barrier = from;
5117 /* If we are at the end of a hot/cold block, dump the constants
5118 here. */
5119 next = NEXT_INSN (from);
5120 if (next
5121 && NOTE_P (next)
5122 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5123 break;
5126 if (broken_move (from))
5128 rtx pat, src, dst;
5129 enum machine_mode mode;
5131 pat = PATTERN (from);
5132 if (GET_CODE (pat) == PARALLEL)
5133 pat = XVECEXP (pat, 0, 0);
5134 src = SET_SRC (pat);
5135 dst = SET_DEST (pat);
5136 mode = GET_MODE (dst);
5138 /* GOT pcrelat setting comes in pair of
5139 mova .L8,r0
5140 mov.l .L8,r12
5141 instructions. (plus add r0,r12).
5142 Remember if we see one without the other. */
5143 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5144 last_got = last_got ? NULL : from;
5145 else if (PIC_ADDR_P (src))
5146 last_got = last_got ? NULL : from;
5148 /* We must explicitly check the mode, because sometimes the
5149 front end will generate code to load unsigned constants into
5150 HImode targets without properly sign extending them. */
5151 if (mode == HImode
5152 || (mode == SImode && satisfies_constraint_I16 (src)
5153 && REGNO (dst) != FPUL_REG))
5155 found_hi += 2;
5156 /* We put the short constants before the long constants, so
5157 we must count the length of short constants in the range
5158 for the long constants. */
5159 /* ??? This isn't optimal, but is easy to do. */
5160 si_limit -= 2;
5162 else
5164 /* We dump DF/DI constants before SF/SI ones, because
5165 the limit is the same, but the alignment requirements
5166 are higher. We may waste up to 4 additional bytes
5167 for alignment, and the DF/DI constant may have
5168 another SF/SI constant placed before it. */
5169 if (TARGET_SHCOMPACT
5170 && ! found_di
5171 && (mode == DFmode || mode == DImode))
5173 found_di = 1;
5174 si_limit -= 8;
5176 while (si_align > 2 && found_si + si_align - 2 > count_si)
5177 si_align >>= 1;
5178 if (found_si > count_si)
5179 count_si = found_si;
5180 found_si += GET_MODE_SIZE (mode);
5181 if (num_mova)
5182 si_limit -= GET_MODE_SIZE (mode);
5186 if (mova_p (from))
5188 switch (untangle_mova (&num_mova, &mova, from))
5190 case 1:
5191 if (flag_pic)
5193 rtx src = SET_SRC (PATTERN (from));
5194 if (GET_CODE (src) == CONST
5195 && GET_CODE (XEXP (src, 0)) == UNSPEC
5196 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5197 last_symoff = from;
5199 break;
5200 case 0: return find_barrier (0, 0, mova);
5201 case 2:
5203 leading_mova = 0;
5204 barrier_before_mova
5205 = good_barrier ? good_barrier : found_barrier;
5207 default: break;
5209 if (found_si > count_si)
5210 count_si = found_si;
5212 else if (JUMP_TABLE_DATA_P (from)
5213 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5215 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5216 || (num_mova
5217 && (prev_nonnote_insn (from)
5218 == XEXP (MOVA_LABELREF (mova), 0))))
5219 num_mova--;
5220 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5222 /* We have just passed the barrier in front of the
5223 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5224 the ADDR_DIFF_VEC is accessed as data, just like our pool
5225 constants, this is a good opportunity to accommodate what
5226 we have gathered so far.
5227 If we waited any longer, we could end up at a barrier in
5228 front of code, which gives worse cache usage for separated
5229 instruction / data caches. */
5230 good_barrier = found_barrier;
5231 break;
5233 else
5235 rtx body = PATTERN (from);
5236 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5239 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5240 else if (JUMP_P (from)
5241 && ! TARGET_SH2
5242 && ! optimize_size)
5243 new_align = 4;
5245 /* There is a possibility that a bf is transformed into a bf/s by the
5246 delay slot scheduler. */
5247 if (JUMP_P (from)
5248 && get_attr_type (from) == TYPE_CBRANCH
5249 && ! sequence_insn_p (from))
5250 inc += 2;
5252 if (found_si)
5254 count_si += inc;
5255 if (new_align > si_align)
5257 si_limit -= (count_si - 1) & (new_align - si_align);
5258 si_align = new_align;
5260 count_si = (count_si + new_align - 1) & -new_align;
5262 if (found_hi)
5264 count_hi += inc;
5265 if (new_align > hi_align)
5267 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5268 hi_align = new_align;
5270 count_hi = (count_hi + new_align - 1) & -new_align;
5272 from = NEXT_INSN (from);
5275 if (num_mova)
5277 if (leading_mova)
5279 /* Try as we might, the leading mova is out of range. Change
5280 it into a load (which will become a pcload) and retry. */
5281 fixup_mova (mova);
5282 return find_barrier (0, 0, mova);
5284 else
5286 /* Insert the constant pool table before the mova instruction,
5287 to prevent the mova label reference from going out of range. */
5288 from = mova;
5289 good_barrier = found_barrier = barrier_before_mova;
5293 if (found_barrier)
5295 if (good_barrier && next_real_insn (found_barrier))
5296 found_barrier = good_barrier;
5298 else
5300 /* We didn't find a barrier in time to dump our stuff,
5301 so we'll make one. */
5302 rtx_code_label *label = gen_label_rtx ();
5304 /* Don't emit a constant table in the middle of insns for
5305 casesi_worker_2. This is a bit overkill but is enough
5306 because casesi_worker_2 wouldn't appear so frequently. */
5307 if (last_symoff)
5308 from = last_symoff;
5310 /* If we exceeded the range, then we must back up over the last
5311 instruction we looked at. Otherwise, we just need to undo the
5312 NEXT_INSN at the end of the loop. */
5313 if (PREV_INSN (from) != orig
5314 && (count_hi > hi_limit || count_si > si_limit))
5315 from = PREV_INSN (PREV_INSN (from));
5316 else
5317 from = PREV_INSN (from);
5319 /* Don't emit a constant table int the middle of global pointer setting,
5320 since that that would move the addressing base GOT into another table.
5321 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5322 in the pool anyway, so just move up the whole constant pool.
5324 However, avoid doing so when the last single GOT mov is the starting
5325 insn itself. Going past above the start insn would create a negative
5326 offset, causing errors. */
5327 if (last_got && last_got != orig)
5328 from = PREV_INSN (last_got);
5330 /* Don't insert the constant pool table at the position which
5331 may be the landing pad. */
5332 if (flag_exceptions
5333 && CALL_P (from)
5334 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5335 from = PREV_INSN (from);
5337 /* Walk back to be just before any jump or label.
5338 Putting it before a label reduces the number of times the branch
5339 around the constant pool table will be hit. Putting it before
5340 a jump makes it more likely that the bra delay slot will be
5341 filled. */
5342 while (NOTE_P (from) || JUMP_P (from)
5343 || LABEL_P (from))
5344 from = PREV_INSN (from);
5346 /* Make sure we do not split between a call and its corresponding
5347 CALL_ARG_LOCATION note. */
5348 if (CALL_P (from))
5350 rtx_insn *next = NEXT_INSN (from);
5351 if (next && NOTE_P (next)
5352 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5353 from = next;
5356 from = emit_jump_insn_after (gen_jump (label), from);
5357 JUMP_LABEL (from) = label;
5358 LABEL_NUSES (label) = 1;
5359 found_barrier = emit_barrier_after (from);
5360 emit_label_after (label, found_barrier);
5363 return found_barrier;
5366 /* If the instruction INSN is implemented by a special function, and we can
5367 positively find the register that is used to call the sfunc, and this
5368 register is not used anywhere else in this instruction - except as the
5369 destination of a set, return this register; else, return 0. */
5371 sfunc_uses_reg (rtx insn)
5373 int i;
5374 rtx pattern, part, reg_part, reg;
5376 if (!NONJUMP_INSN_P (insn))
5377 return NULL_RTX;
5378 pattern = PATTERN (insn);
5379 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5380 return NULL_RTX;
5382 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5384 part = XVECEXP (pattern, 0, i);
5385 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5386 reg_part = part;
5388 if (! reg_part)
5389 return NULL_RTX;
5390 reg = XEXP (reg_part, 0);
5391 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5393 part = XVECEXP (pattern, 0, i);
5394 if (part == reg_part || GET_CODE (part) == CLOBBER)
5395 continue;
5396 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5397 && REG_P (SET_DEST (part)))
5398 ? SET_SRC (part) : part)))
5399 return NULL_RTX;
5401 return reg;
5404 /* See if the only way in which INSN uses REG is by calling it, or by
5405 setting it while calling it. Set *SET to a SET rtx if the register
5406 is set by INSN. */
5407 static bool
5408 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5410 rtx pattern, reg2;
5412 *set = NULL_RTX;
5414 reg2 = sfunc_uses_reg (insn);
5415 if (reg2 && REGNO (reg2) == REGNO (reg))
5417 pattern = single_set (insn);
5418 if (pattern
5419 && REG_P (SET_DEST (pattern))
5420 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5421 *set = pattern;
5422 return false;
5424 if (!CALL_P (insn))
5426 /* We don't use rtx_equal_p because we don't care if the mode is
5427 different. */
5428 pattern = single_set (insn);
5429 if (pattern
5430 && REG_P (SET_DEST (pattern))
5431 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5433 rtx par, part;
5434 int i;
5436 *set = pattern;
5437 par = PATTERN (insn);
5438 if (GET_CODE (par) == PARALLEL)
5439 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5441 part = XVECEXP (par, 0, i);
5442 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5443 return true;
5445 return reg_mentioned_p (reg, SET_SRC (pattern));
5448 return true;
5451 pattern = PATTERN (insn);
5453 if (GET_CODE (pattern) == PARALLEL)
5455 int i;
5457 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5458 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5459 return true;
5460 pattern = XVECEXP (pattern, 0, 0);
5463 if (GET_CODE (pattern) == SET)
5465 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5467 /* We don't use rtx_equal_p, because we don't care if the
5468 mode is different. */
5469 if (!REG_P (SET_DEST (pattern))
5470 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5471 return true;
5473 *set = pattern;
5476 pattern = SET_SRC (pattern);
5479 if (GET_CODE (pattern) != CALL
5480 || !MEM_P (XEXP (pattern, 0))
5481 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5482 return true;
5484 return false;
5487 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5488 general registers. Bits 0..15 mean that the respective registers
5489 are used as inputs in the instruction. Bits 16..31 mean that the
5490 registers 0..15, respectively, are used as outputs, or are clobbered.
5491 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5493 regs_used (rtx x, int is_dest)
5495 enum rtx_code code;
5496 const char *fmt;
5497 int i, used = 0;
5499 if (! x)
5500 return used;
5501 code = GET_CODE (x);
5502 switch (code)
5504 case REG:
5505 if (REGNO (x) < 16)
5506 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5507 << (REGNO (x) + is_dest));
5508 return 0;
5509 case SUBREG:
5511 rtx y = SUBREG_REG (x);
5513 if (!REG_P (y))
5514 break;
5515 if (REGNO (y) < 16)
5516 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5517 << (REGNO (y) +
5518 subreg_regno_offset (REGNO (y),
5519 GET_MODE (y),
5520 SUBREG_BYTE (x),
5521 GET_MODE (x)) + is_dest));
5522 return 0;
5524 case SET:
5525 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5526 case RETURN:
5527 /* If there was a return value, it must have been indicated with USE. */
5528 return 0x00ffff00;
5529 case CLOBBER:
5530 is_dest = 1;
5531 break;
5532 case MEM:
5533 is_dest = 0;
5534 break;
5535 case CALL:
5536 used |= 0x00ff00f0;
5537 break;
5538 default:
5539 break;
5542 fmt = GET_RTX_FORMAT (code);
5544 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5546 if (fmt[i] == 'E')
5548 int j;
5549 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5550 used |= regs_used (XVECEXP (x, i, j), is_dest);
5552 else if (fmt[i] == 'e')
5553 used |= regs_used (XEXP (x, i), is_dest);
5555 return used;
5558 /* Create an instruction that prevents redirection of a conditional branch
5559 to the destination of the JUMP with address ADDR.
5560 If the branch needs to be implemented as an indirect jump, try to find
5561 a scratch register for it.
5562 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5563 If any preceding insn that doesn't fit into a delay slot is good enough,
5564 pass 1. Pass 2 if a definite blocking insn is needed.
5565 -1 is used internally to avoid deep recursion.
5566 If a blocking instruction is made or recognized, return it. */
5567 static rtx_insn *
5568 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5570 int dead = 0;
5571 rtx_insn *prev = prev_nonnote_insn (jump);
5572 rtx dest;
5574 /* First, check if we already have an instruction that satisfies our need. */
5575 if (prev && NONJUMP_INSN_P (prev) && ! INSN_DELETED_P (prev))
5577 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5578 return prev;
5579 if (GET_CODE (PATTERN (prev)) == USE
5580 || GET_CODE (PATTERN (prev)) == CLOBBER
5581 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5582 prev = jump;
5583 else if ((need_block &= ~1) < 0)
5584 return prev;
5585 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5586 need_block = 0;
5588 if (GET_CODE (PATTERN (jump)) == RETURN)
5590 if (! need_block)
5591 return prev;
5592 /* Reorg even does nasty things with return insns that cause branches
5593 to go out of range - see find_end_label and callers. */
5594 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5596 /* We can't use JUMP_LABEL here because it might be undefined
5597 when not optimizing. */
5598 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5599 /* If the branch is out of range, try to find a scratch register for it. */
5600 if (optimize
5601 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5602 > 4092 + 4098))
5604 rtx_insn *scan;
5605 /* Don't look for the stack pointer as a scratch register,
5606 it would cause trouble if an interrupt occurred. */
5607 unsigned attempt = 0x7fff, used;
5608 int jump_left = flag_expensive_optimizations + 1;
5610 /* It is likely that the most recent eligible instruction is wanted for
5611 the delay slot. Therefore, find out which registers it uses, and
5612 try to avoid using them. */
5614 for (scan = jump; (scan = PREV_INSN (scan)); )
5616 enum rtx_code code;
5618 if (INSN_DELETED_P (scan))
5619 continue;
5620 code = GET_CODE (scan);
5621 if (code == CODE_LABEL || code == JUMP_INSN)
5622 break;
5623 if (code == INSN
5624 && GET_CODE (PATTERN (scan)) != USE
5625 && GET_CODE (PATTERN (scan)) != CLOBBER
5626 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5628 attempt &= ~regs_used (PATTERN (scan), 0);
5629 break;
5632 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5633 (scan = NEXT_INSN (scan)); )
5635 enum rtx_code code;
5637 if (INSN_DELETED_P (scan))
5638 continue;
5639 code = GET_CODE (scan);
5640 if (INSN_P (scan))
5642 used |= regs_used (PATTERN (scan), 0);
5643 if (code == CALL_INSN)
5644 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5645 dead |= (used >> 16) & ~used;
5646 if (dead & attempt)
5648 dead &= attempt;
5649 break;
5651 if (code == JUMP_INSN)
5653 if (jump_left-- && simplejump_p (scan))
5654 scan = JUMP_LABEL_AS_INSN (scan);
5655 else
5656 break;
5660 /* Mask out the stack pointer again, in case it was
5661 the only 'free' register we have found. */
5662 dead &= 0x7fff;
5664 /* If the immediate destination is still in range, check for possible
5665 threading with a jump beyond the delay slot insn.
5666 Don't check if we are called recursively; the jump has been or will be
5667 checked in a different invocation then. */
5669 else if (optimize && need_block >= 0)
5671 rtx_insn *next = next_active_insn (next_active_insn (dest));
5672 if (next && JUMP_P (next)
5673 && GET_CODE (PATTERN (next)) == SET
5674 && recog_memoized (next) == CODE_FOR_jump_compact)
5676 dest = JUMP_LABEL (next);
5677 if (dest
5678 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5679 > 4092 + 4098))
5680 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5684 if (dead)
5686 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5688 /* It would be nice if we could convert the jump into an indirect
5689 jump / far branch right now, and thus exposing all constituent
5690 instructions to further optimization. However, reorg uses
5691 simplejump_p to determine if there is an unconditional jump where
5692 it should try to schedule instructions from the target of the
5693 branch; simplejump_p fails for indirect jumps even if they have
5694 a JUMP_LABEL. */
5695 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5696 (reg, GEN_INT (unspec_bbr_uid++)),
5697 jump);
5698 /* ??? We would like this to have the scope of the jump, but that
5699 scope will change when a delay slot insn of an inner scope is added.
5700 Hence, after delay slot scheduling, we'll have to expect
5701 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5702 the jump. */
5704 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5705 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5706 return insn;
5708 else if (need_block)
5709 /* We can't use JUMP_LABEL here because it might be undefined
5710 when not optimizing. */
5711 return emit_insn_before (gen_block_branch_redirect
5712 (GEN_INT (unspec_bbr_uid++)),
5713 jump);
5714 return prev;
5717 #define CONDJUMP_MIN -252
5718 #define CONDJUMP_MAX 262
5719 struct far_branch
5721 /* A label (to be placed) in front of the jump
5722 that jumps to our ultimate destination. */
5723 rtx_insn *near_label;
5724 /* Where we are going to insert it if we cannot move the jump any farther,
5725 or the jump itself if we have picked up an existing jump. */
5726 rtx_insn *insert_place;
5727 /* The ultimate destination. */
5728 rtx_insn *far_label;
5729 struct far_branch *prev;
5730 /* If the branch has already been created, its address;
5731 else the address of its first prospective user. */
5732 int address;
5735 static void gen_far_branch (struct far_branch *);
5736 enum mdep_reorg_phase_e mdep_reorg_phase;
5737 static void
5738 gen_far_branch (struct far_branch *bp)
5740 rtx_insn *insn = bp->insert_place;
5741 rtx_insn *jump;
5742 rtx_code_label *label = gen_label_rtx ();
5743 int ok;
5745 emit_label_after (label, insn);
5746 if (bp->far_label)
5748 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5749 LABEL_NUSES (bp->far_label)++;
5751 else
5752 jump = emit_jump_insn_after (gen_return (), insn);
5754 /* Emit a barrier so that reorg knows that any following instructions
5755 are not reachable via a fall-through path.
5756 But don't do this when not optimizing, since we wouldn't suppress the
5757 alignment for the barrier then, and could end up with out-of-range
5758 pc-relative loads. */
5759 if (optimize)
5760 emit_barrier_after (jump);
5761 emit_label_after (bp->near_label, insn);
5763 if (bp->far_label)
5764 JUMP_LABEL (jump) = bp->far_label;
5765 else
5767 rtx pat = PATTERN (jump);
5768 gcc_assert (ANY_RETURN_P (pat));
5769 JUMP_LABEL (jump) = pat;
5772 ok = invert_jump (insn, label, 1);
5773 gcc_assert (ok);
5775 /* If we are branching around a jump (rather than a return), prevent
5776 reorg from using an insn from the jump target as the delay slot insn -
5777 when reorg did this, it pessimized code (we rather hide the delay slot)
5778 and it could cause branches to go out of range. */
5779 if (bp->far_label)
5780 (emit_insn_after
5781 (gen_stuff_delay_slot
5782 (GEN_INT (unspec_bbr_uid++),
5783 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5784 insn));
5785 /* Prevent reorg from undoing our splits. */
5786 gen_block_redirect (jump, bp->address += 2, 2);
5789 /* Fix up ADDR_DIFF_VECs. */
5790 void
5791 fixup_addr_diff_vecs (rtx_insn *first)
5793 rtx_insn *insn;
5795 for (insn = first; insn; insn = NEXT_INSN (insn))
5797 rtx vec_lab, pat, prevpat, x, braf_label;
5798 rtx_insn *prev;
5800 if (! JUMP_TABLE_DATA_P (insn)
5801 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5802 continue;
5803 pat = PATTERN (insn);
5804 vec_lab = XEXP (XEXP (pat, 0), 0);
5806 /* Search the matching casesi_jump_2. */
5807 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5809 if (!JUMP_P (prev))
5810 continue;
5811 prevpat = PATTERN (prev);
5812 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5813 continue;
5814 x = XVECEXP (prevpat, 0, 1);
5815 if (GET_CODE (x) != USE)
5816 continue;
5817 x = XEXP (x, 0);
5818 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5819 break;
5821 /* FIXME: This is a bug in the optimizer, but it seems harmless
5822 to just avoid panicing. */
5823 if (!prev)
5824 continue;
5826 /* Emit the reference label of the braf where it belongs, right after
5827 the casesi_jump_2 (i.e. braf). */
5828 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5829 emit_label_after (braf_label, prev);
5831 /* Fix up the ADDR_DIF_VEC to be relative
5832 to the reference address of the braf. */
5833 XEXP (XEXP (pat, 0), 0) = braf_label;
5837 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5838 a barrier. Return the base 2 logarithm of the desired alignment. */
5840 barrier_align (rtx_insn *barrier_or_label)
5842 rtx next, pat;
5844 if (! barrier_or_label)
5845 return 0;
5847 if (LABEL_P (barrier_or_label)
5848 && NEXT_INSN (barrier_or_label)
5849 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5850 return 2;
5852 if (BARRIER_P (barrier_or_label)
5853 && PREV_INSN (barrier_or_label)
5854 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5856 pat = PATTERN (PREV_INSN (barrier_or_label));
5857 /* If this is a very small table, we want to keep the alignment after
5858 the table to the minimum for proper code alignment. */
5859 return ((optimize_size
5860 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5861 <= (unsigned) 1 << (CACHE_LOG - 2)))
5862 ? 1 << TARGET_SHMEDIA : align_jumps_log);
5865 next = next_active_insn (barrier_or_label);
5867 if (! next)
5868 return 0;
5870 pat = PATTERN (next);
5872 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5873 /* This is a barrier in front of a constant table. */
5874 return 0;
5876 if (optimize_size)
5877 return 0;
5879 if (! TARGET_SH2 || ! optimize)
5880 return align_jumps_log;
5882 /* When fixing up pcloads, a constant table might be inserted just before
5883 the basic block that ends with the barrier. Thus, we can't trust the
5884 instruction lengths before that. */
5885 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5887 /* Check if there is an immediately preceding branch to the insn beyond
5888 the barrier. We must weight the cost of discarding useful information
5889 from the current cache line when executing this branch and there is
5890 an alignment, against that of fetching unneeded insn in front of the
5891 branch target when there is no alignment. */
5893 /* There are two delay_slot cases to consider. One is the simple case
5894 where the preceding branch is to the insn beyond the barrier (simple
5895 delay slot filling), and the other is where the preceding branch has
5896 a delay slot that is a duplicate of the insn after the barrier
5897 (fill_eager_delay_slots) and the branch is to the insn after the insn
5898 after the barrier. */
5900 int slot, credit;
5901 bool jump_to_next = false;
5903 /* Skip to the insn before the JUMP_INSN before the barrier under
5904 investigation. */
5905 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5907 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5908 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5909 prev = prev_real_insn (prev))
5911 jump_to_next = false;
5912 if (GET_CODE (PATTERN (prev)) == USE
5913 || GET_CODE (PATTERN (prev)) == CLOBBER)
5914 continue;
5915 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5917 prev = prev_seq->insn (1);
5918 if (INSN_UID (prev) == INSN_UID (next))
5920 /* Delay slot was filled with insn at jump target. */
5921 jump_to_next = true;
5922 continue;
5926 if (slot &&
5927 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5928 slot = 0;
5929 credit -= get_attr_length (prev);
5931 if (prev && jump_to_label_p (prev))
5933 rtx_insn *x;
5934 if (jump_to_next
5935 || next_real_insn (JUMP_LABEL (prev)) == next
5936 /* If relax_delay_slots() decides NEXT was redundant
5937 with some previous instruction, it will have
5938 redirected PREV's jump to the following insn. */
5939 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5940 /* There is no upper bound on redundant instructions
5941 that might have been skipped, but we must not put an
5942 alignment where none had been before. */
5943 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5944 (INSN_P (x)
5945 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5946 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5947 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5949 rtx pat = PATTERN (prev);
5950 if (GET_CODE (pat) == PARALLEL)
5951 pat = XVECEXP (pat, 0, 0);
5952 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5953 return 0;
5958 return align_jumps_log;
5961 /* If we are inside a phony loop, almost any kind of label can turn up as the
5962 first one in the loop. Aligning a braf label causes incorrect switch
5963 destination addresses; we can detect braf labels because they are
5964 followed by a BARRIER.
5965 Applying loop alignment to small constant or switch tables is a waste
5966 of space, so we suppress this too. */
5968 sh_loop_align (rtx_insn *label)
5970 rtx_insn *next = label;
5972 if (! optimize || optimize_size)
5973 return 0;
5976 next = next_nonnote_insn (next);
5977 while (next && LABEL_P (next));
5979 if (! next
5980 || ! INSN_P (next)
5981 || recog_memoized (next) == CODE_FOR_consttable_2)
5982 return 0;
5984 return align_loops_log;
5987 /* Do a final pass over the function, just before delayed branch
5988 scheduling. */
5989 static void
5990 sh_reorg (void)
5992 rtx_insn *first, *insn, *mova = NULL;
5993 int num_mova;
5994 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5995 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5997 first = get_insns ();
5998 max_labelno_before_reorg = max_label_num ();
6000 /* We must split call insns before introducing `mova's. If we're
6001 optimizing, they'll have already been split. Otherwise, make
6002 sure we don't split them too late. */
6003 if (! optimize)
6004 split_all_insns_noflow ();
6006 if (TARGET_SHMEDIA)
6007 return;
6009 /* If relaxing, generate pseudo-ops to associate function calls with
6010 the symbols they call. It does no harm to not generate these
6011 pseudo-ops. However, when we can generate them, it enables the
6012 linker to potentially relax the jsr to a bsr, and eliminate the
6013 register load and, possibly, the constant pool entry. */
6015 mdep_reorg_phase = SH_INSERT_USES_LABELS;
6016 if (TARGET_RELAX)
6018 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
6019 own purposes. This works because none of the remaining passes
6020 need to look at them.
6022 ??? But it may break in the future. We should use a machine
6023 dependent REG_NOTE, or some other approach entirely. */
6024 for (insn = first; insn; insn = NEXT_INSN (insn))
6026 if (INSN_P (insn))
6028 rtx note;
6030 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
6031 NULL_RTX)) != 0)
6032 remove_note (insn, note);
6036 for (insn = first; insn; insn = NEXT_INSN (insn))
6038 rtx pattern, reg, set, dies;
6039 rtx_code_label *label;
6040 rtx_insn *link, *scan;
6041 int rescan = 0, foundinsn = 0;
6043 if (CALL_P (insn))
6045 pattern = PATTERN (insn);
6047 if (GET_CODE (pattern) == PARALLEL)
6048 pattern = XVECEXP (pattern, 0, 0);
6049 if (GET_CODE (pattern) == SET)
6050 pattern = SET_SRC (pattern);
6052 if (GET_CODE (pattern) != CALL
6053 || !MEM_P (XEXP (pattern, 0)))
6054 continue;
6056 reg = XEXP (XEXP (pattern, 0), 0);
6058 else
6060 reg = sfunc_uses_reg (insn);
6061 if (! reg)
6062 continue;
6065 if (!REG_P (reg))
6066 continue;
6068 /* Try scanning backward to find where the register is set. */
6069 link = NULL;
6070 for (scan = PREV_INSN (insn);
6071 scan && !LABEL_P (scan);
6072 scan = PREV_INSN (scan))
6074 if (! INSN_P (scan))
6075 continue;
6077 if (! reg_mentioned_p (reg, scan))
6078 continue;
6080 if (noncall_uses_reg (reg, scan, &set))
6081 break;
6083 if (set)
6085 link = scan;
6086 break;
6090 if (! link)
6091 continue;
6093 /* The register is set at LINK. */
6095 /* We can only optimize the function call if the register is
6096 being set to a symbol. In theory, we could sometimes
6097 optimize calls to a constant location, but the assembler
6098 and linker do not support that at present. */
6099 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
6100 && GET_CODE (SET_SRC (set)) != LABEL_REF)
6101 continue;
6103 /* Scan forward from LINK to the place where REG dies, and
6104 make sure that the only insns which use REG are
6105 themselves function calls. */
6107 /* ??? This doesn't work for call targets that were allocated
6108 by reload, since there may not be a REG_DEAD note for the
6109 register. */
6111 dies = NULL_RTX;
6112 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6114 rtx scanset;
6116 /* Don't try to trace forward past a CODE_LABEL if we haven't
6117 seen INSN yet. Ordinarily, we will only find the setting insn
6118 if it is in the same basic block. However,
6119 cross-jumping can insert code labels in between the load and
6120 the call, and can result in situations where a single call
6121 insn may have two targets depending on where we came from. */
6123 if (LABEL_P (scan) && ! foundinsn)
6124 break;
6126 if (! INSN_P (scan))
6127 continue;
6129 /* Don't try to trace forward past a JUMP. To optimize
6130 safely, we would have to check that all the
6131 instructions at the jump destination did not use REG. */
6133 if (JUMP_P (scan))
6134 break;
6136 if (! reg_mentioned_p (reg, scan))
6137 continue;
6139 if (noncall_uses_reg (reg, scan, &scanset))
6140 break;
6142 if (scan == insn)
6143 foundinsn = 1;
6145 if (scan != insn
6146 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6148 /* There is a function call to this register other
6149 than the one we are checking. If we optimize
6150 this call, we need to rescan again below. */
6151 rescan = 1;
6154 /* ??? We shouldn't have to worry about SCANSET here.
6155 We should just be able to check for a REG_DEAD note
6156 on a function call. However, the REG_DEAD notes are
6157 apparently not dependable around libcalls; c-torture
6158 execute/920501-2 is a test case. If SCANSET is set,
6159 then this insn sets the register, so it must have
6160 died earlier. Unfortunately, this will only handle
6161 the cases in which the register is, in fact, set in a
6162 later insn. */
6164 /* ??? We shouldn't have to use FOUNDINSN here.
6165 This dates back to when we used LOG_LINKS to find
6166 the most recent insn which sets the register. */
6168 if (foundinsn
6169 && (scanset
6170 || find_reg_note (scan, REG_DEAD, reg)))
6172 dies = scan;
6173 break;
6177 if (! dies)
6179 /* Either there was a branch, or some insn used REG
6180 other than as a function call address. */
6181 continue;
6184 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6185 on the insn which sets the register, and on each call insn
6186 which uses the register. In final_prescan_insn we look for
6187 the REG_LABEL_OPERAND notes, and output the appropriate label
6188 or pseudo-op. */
6190 label = gen_label_rtx ();
6191 add_reg_note (link, REG_LABEL_OPERAND, label);
6192 add_reg_note (insn, REG_LABEL_OPERAND, label);
6193 if (rescan)
6195 scan = link;
6198 rtx reg2;
6200 scan = NEXT_INSN (scan);
6201 if (scan != insn
6202 && ((CALL_P (scan)
6203 && reg_mentioned_p (reg, scan))
6204 || ((reg2 = sfunc_uses_reg (scan))
6205 && REGNO (reg2) == REGNO (reg))))
6206 add_reg_note (scan, REG_LABEL_OPERAND, label);
6208 while (scan != dies);
6213 if (TARGET_SH2)
6214 fixup_addr_diff_vecs (first);
6216 if (optimize)
6218 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6219 shorten_branches (first);
6222 /* Scan the function looking for move instructions which have to be
6223 changed to pc-relative loads and insert the literal tables. */
6224 label_ref_list_pool = create_alloc_pool ("label references list",
6225 sizeof (struct label_ref_list_d),
6226 30);
6227 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6228 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6230 if (mova_p (insn))
6232 /* ??? basic block reordering can move a switch table dispatch
6233 below the switch table. Check if that has happened.
6234 We only have the addresses available when optimizing; but then,
6235 this check shouldn't be needed when not optimizing. */
6236 if (!untangle_mova (&num_mova, &mova, insn))
6238 insn = mova;
6239 num_mova = 0;
6242 else if (JUMP_TABLE_DATA_P (insn)
6243 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6244 && num_mova
6245 /* ??? loop invariant motion can also move a mova out of a
6246 loop. Since loop does this code motion anyway, maybe we
6247 should wrap UNSPEC_MOVA into a CONST, so that reload can
6248 move it back. */
6249 && ((num_mova > 1
6250 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6251 || (prev_nonnote_insn (insn)
6252 == XEXP (MOVA_LABELREF (mova), 0))))
6254 rtx_insn *scan;
6255 int total;
6257 num_mova--;
6259 /* Some code might have been inserted between the mova and
6260 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6261 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6262 total += get_attr_length (scan);
6264 /* range of mova is 1020, add 4 because pc counts from address of
6265 second instruction after this one, subtract 2 in case pc is 2
6266 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6267 cancels out with alignment effects of the mova itself. */
6268 if (total > 1022)
6270 /* Change the mova into a load, and restart scanning
6271 there. broken_move will then return true for mova. */
6272 fixup_mova (mova);
6273 insn = mova;
6276 if (broken_move (insn)
6277 || (NONJUMP_INSN_P (insn)
6278 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6280 rtx_insn *scan;
6281 /* Scan ahead looking for a barrier to stick the constant table
6282 behind. */
6283 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6284 rtx_insn *last_float_move = NULL;
6285 rtx last_float = 0, *last_float_addr = NULL;
6286 int need_aligned_label = 0;
6288 if (num_mova && ! mova_p (mova))
6290 /* find_barrier had to change the first mova into a
6291 pcload; thus, we have to start with this new pcload. */
6292 insn = mova;
6293 num_mova = 0;
6295 /* Now find all the moves between the points and modify them. */
6296 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6298 if (LABEL_P (scan))
6299 last_float = 0;
6300 if (NONJUMP_INSN_P (scan)
6301 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6302 need_aligned_label = 1;
6303 if (broken_move (scan))
6305 rtx *patp = &PATTERN (scan), pat = *patp;
6306 rtx src, dst;
6307 rtx lab;
6308 rtx newsrc;
6309 enum machine_mode mode;
6311 if (GET_CODE (pat) == PARALLEL)
6312 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6313 src = SET_SRC (pat);
6314 dst = SET_DEST (pat);
6315 mode = GET_MODE (dst);
6317 if (mode == SImode && satisfies_constraint_I16 (src)
6318 && REGNO (dst) != FPUL_REG)
6320 int offset = 0;
6322 mode = HImode;
6323 while (GET_CODE (dst) == SUBREG)
6325 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6326 GET_MODE (SUBREG_REG (dst)),
6327 SUBREG_BYTE (dst),
6328 GET_MODE (dst));
6329 dst = SUBREG_REG (dst);
6331 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6333 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6335 /* This must be an insn that clobbers r0. */
6336 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6337 XVECLEN (PATTERN (scan), 0)
6338 - 1);
6339 rtx clobber = *clobberp;
6341 gcc_assert (GET_CODE (clobber) == CLOBBER
6342 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6344 if (last_float
6345 && reg_set_between_p (r0_rtx, last_float_move, scan))
6346 last_float = 0;
6347 if (last_float
6348 && TARGET_SHCOMPACT
6349 && GET_MODE_SIZE (mode) != 4
6350 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
6351 last_float = 0;
6352 lab = add_constant (src, mode, last_float);
6353 if (lab)
6354 emit_insn_before (gen_mova (lab), scan);
6355 else
6357 /* There will be a REG_UNUSED note for r0 on
6358 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6359 lest reorg:mark_target_live_regs will not
6360 consider r0 to be used, and we end up with delay
6361 slot insn in front of SCAN that clobbers r0. */
6362 rtx note
6363 = find_regno_note (last_float_move, REG_UNUSED, 0);
6365 /* If we are not optimizing, then there may not be
6366 a note. */
6367 if (note)
6368 PUT_REG_NOTE_KIND (note, REG_INC);
6370 *last_float_addr = r0_inc_rtx;
6372 last_float_move = scan;
6373 last_float = src;
6374 newsrc = gen_const_mem (mode,
6375 (((TARGET_SH4 && ! TARGET_FMOVD)
6376 || REGNO (dst) == FPUL_REG)
6377 ? r0_inc_rtx
6378 : r0_rtx));
6379 last_float_addr = &XEXP (newsrc, 0);
6381 /* Remove the clobber of r0. */
6382 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6383 gen_rtx_SCRATCH (Pmode));
6385 /* This is a mova needing a label. Create it. */
6386 else if (GET_CODE (src) == UNSPEC
6387 && XINT (src, 1) == UNSPEC_MOVA
6388 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6390 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6391 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6392 newsrc = gen_rtx_UNSPEC (SImode,
6393 gen_rtvec (1, newsrc),
6394 UNSPEC_MOVA);
6396 else if (GET_CODE (src) == UNSPEC_VOLATILE
6397 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6399 newsrc = XVECEXP (src, 0, 0);
6400 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6401 INSN_CODE (scan) = -1;
6402 continue;
6404 else
6406 lab = add_constant (src, mode, 0);
6407 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6408 newsrc = gen_const_mem (mode, newsrc);
6410 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
6411 INSN_CODE (scan) = -1;
6414 dump_table (need_aligned_label ? insn : 0, barrier);
6415 insn = barrier;
6418 free_alloc_pool (label_ref_list_pool);
6419 for (insn = first; insn; insn = NEXT_INSN (insn))
6420 PUT_MODE (insn, VOIDmode);
6422 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6423 INSN_ADDRESSES_FREE ();
6424 split_branches (first);
6426 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6427 also has an effect on the register that holds the address of the sfunc.
6428 Insert an extra dummy insn in front of each sfunc that pretends to
6429 use this register. */
6430 if (flag_delayed_branch)
6432 for (insn = first; insn; insn = NEXT_INSN (insn))
6434 rtx reg = sfunc_uses_reg (insn);
6436 if (! reg)
6437 continue;
6438 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6441 #if 0
6442 /* fpscr is not actually a user variable, but we pretend it is for the
6443 sake of the previous optimization passes, since we want it handled like
6444 one. However, we don't have any debugging information for it, so turn
6445 it into a non-user variable now. */
6446 if (TARGET_SH4)
6447 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
6448 #endif
6449 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6452 /* Return the UID of the insn that follows the specified label. */
6454 get_dest_uid (rtx label, int max_uid)
6456 rtx_insn *dest = next_real_insn (label);
6457 int dest_uid;
6458 if (! dest)
6459 /* This can happen for an undefined label. */
6460 return 0;
6461 dest_uid = INSN_UID (dest);
6462 /* If this is a newly created branch redirection blocking instruction,
6463 we cannot index the branch_uid or insn_addresses arrays with its
6464 uid. But then, we won't need to, because the actual destination is
6465 the following branch. */
6466 while (dest_uid >= max_uid)
6468 dest = NEXT_INSN (dest);
6469 dest_uid = INSN_UID (dest);
6471 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6472 return 0;
6473 return dest_uid;
6476 /* Split condbranches that are out of range. Also add clobbers for
6477 scratch registers that are needed in far jumps.
6478 We do this before delay slot scheduling, so that it can take our
6479 newly created instructions into account. It also allows us to
6480 find branches with common targets more easily. */
6481 static void
6482 split_branches (rtx_insn *first)
6484 rtx_insn *insn;
6485 struct far_branch **uid_branch, *far_branch_list = 0;
6486 int max_uid = get_max_uid ();
6487 int ok;
6489 /* Find out which branches are out of range. */
6490 shorten_branches (first);
6492 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6493 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6495 for (insn = first; insn; insn = NEXT_INSN (insn))
6496 if (! INSN_P (insn))
6497 continue;
6498 else if (INSN_DELETED_P (insn))
6500 /* Shorten_branches would split this instruction again,
6501 so transform it into a note. */
6502 SET_INSN_DELETED (insn);
6504 else if (JUMP_P (insn))
6506 enum attr_type type = get_attr_type (insn);
6507 if (type == TYPE_CBRANCH)
6509 rtx_insn *next, *beyond;
6511 if (get_attr_length (insn) > 4)
6513 rtx src = SET_SRC (PATTERN (insn));
6514 rtx olabel = XEXP (XEXP (src, 1), 0);
6515 int addr = INSN_ADDRESSES (INSN_UID (insn));
6516 rtx_insn *label = 0;
6517 int dest_uid = get_dest_uid (olabel, max_uid);
6518 struct far_branch *bp = uid_branch[dest_uid];
6520 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6521 the label if the LABEL_NUSES count drops to zero. There is
6522 always a jump_optimize pass that sets these values, but it
6523 proceeds to delete unreferenced code, and then if not
6524 optimizing, to un-delete the deleted instructions, thus
6525 leaving labels with too low uses counts. */
6526 if (! optimize)
6528 JUMP_LABEL (insn) = olabel;
6529 LABEL_NUSES (olabel)++;
6531 if (! bp)
6533 bp = (struct far_branch *) alloca (sizeof *bp);
6534 uid_branch[dest_uid] = bp;
6535 bp->prev = far_branch_list;
6536 far_branch_list = bp;
6537 bp->far_label = as_a <rtx_insn *> (
6538 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6539 0));
6540 LABEL_NUSES (bp->far_label)++;
6542 else
6544 label = bp->near_label;
6545 if (! label && bp->address - addr >= CONDJUMP_MIN)
6547 rtx_insn *block = bp->insert_place;
6549 if (GET_CODE (PATTERN (block)) == RETURN)
6550 block = PREV_INSN (block);
6551 else
6552 block = gen_block_redirect (block,
6553 bp->address, 2);
6554 label = emit_label_after (gen_label_rtx (),
6555 PREV_INSN (block));
6556 bp->near_label = label;
6558 else if (label && ! NEXT_INSN (label))
6560 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6561 bp->insert_place = insn;
6562 else
6563 gen_far_branch (bp);
6566 if (! label
6567 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6569 bp->near_label = label = gen_label_rtx ();
6570 bp->insert_place = insn;
6571 bp->address = addr;
6573 ok = redirect_jump (insn, label, 0);
6574 gcc_assert (ok);
6576 else
6578 /* get_attr_length (insn) == 2 */
6579 /* Check if we have a pattern where reorg wants to redirect
6580 the branch to a label from an unconditional branch that
6581 is too far away. */
6582 /* We can't use JUMP_LABEL here because it might be undefined
6583 when not optimizing. */
6584 /* A syntax error might cause beyond to be NULL_RTX. */
6585 beyond
6586 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6587 0));
6589 if (beyond
6590 && (JUMP_P (beyond)
6591 || ((beyond = next_active_insn (beyond))
6592 && JUMP_P (beyond)))
6593 && GET_CODE (PATTERN (beyond)) == SET
6594 && recog_memoized (beyond) == CODE_FOR_jump_compact
6595 && ((INSN_ADDRESSES
6596 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6597 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6598 > 252 + 258 + 2))
6599 gen_block_redirect (beyond,
6600 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6603 next = next_active_insn (insn);
6605 if (next
6606 && (JUMP_P (next)
6607 || ((next = next_active_insn (next))
6608 && JUMP_P (next)))
6609 && GET_CODE (PATTERN (next)) == SET
6610 && recog_memoized (next) == CODE_FOR_jump_compact
6611 && ((INSN_ADDRESSES
6612 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6613 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6614 > 252 + 258 + 2))
6615 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6617 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6619 int addr = INSN_ADDRESSES (INSN_UID (insn));
6620 rtx_insn *far_label = 0;
6621 int dest_uid = 0;
6622 struct far_branch *bp;
6624 if (type == TYPE_JUMP)
6626 far_label = as_a <rtx_insn *> (
6627 XEXP (SET_SRC (PATTERN (insn)), 0));
6628 dest_uid = get_dest_uid (far_label, max_uid);
6629 if (! dest_uid)
6631 /* Parse errors can lead to labels outside
6632 the insn stream. */
6633 if (! NEXT_INSN (far_label))
6634 continue;
6636 if (! optimize)
6638 JUMP_LABEL (insn) = far_label;
6639 LABEL_NUSES (far_label)++;
6641 redirect_jump (insn, ret_rtx, 1);
6642 far_label = 0;
6645 bp = uid_branch[dest_uid];
6646 if (! bp)
6648 bp = (struct far_branch *) alloca (sizeof *bp);
6649 uid_branch[dest_uid] = bp;
6650 bp->prev = far_branch_list;
6651 far_branch_list = bp;
6652 bp->near_label = 0;
6653 bp->far_label = far_label;
6654 if (far_label)
6655 LABEL_NUSES (far_label)++;
6657 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6658 if (addr - bp->address <= CONDJUMP_MAX)
6659 emit_label_after (bp->near_label, PREV_INSN (insn));
6660 else
6662 gen_far_branch (bp);
6663 bp->near_label = 0;
6665 else
6666 bp->near_label = 0;
6667 bp->address = addr;
6668 bp->insert_place = insn;
6669 if (! far_label)
6670 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6671 else
6672 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6675 /* Generate all pending far branches,
6676 and free our references to the far labels. */
6677 while (far_branch_list)
6679 if (far_branch_list->near_label
6680 && ! NEXT_INSN (far_branch_list->near_label))
6681 gen_far_branch (far_branch_list);
6682 if (optimize
6683 && far_branch_list->far_label
6684 && ! --LABEL_NUSES (far_branch_list->far_label))
6685 delete_insn (far_branch_list->far_label);
6686 far_branch_list = far_branch_list->prev;
6689 /* Instruction length information is no longer valid due to the new
6690 instructions that have been generated. */
6691 init_insn_lengths ();
6694 /* Dump out instruction addresses, which is useful for debugging the
6695 constant pool table stuff.
6697 If relaxing, output the label and pseudo-ops used to link together
6698 calls and the instruction which set the registers.
6700 ??? The addresses printed by this routine for insns are nonsense for
6701 insns which are inside of a sequence where none of the inner insns have
6702 variable length. This is because the second pass of shorten_branches
6703 does not bother to update them. */
6704 void
6705 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6706 int noperands ATTRIBUTE_UNUSED)
6708 if (TARGET_DUMPISIZE)
6709 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6711 if (TARGET_RELAX)
6713 rtx note;
6715 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
6716 if (note)
6718 rtx pattern;
6720 pattern = PATTERN (insn);
6721 if (GET_CODE (pattern) == PARALLEL)
6722 pattern = XVECEXP (pattern, 0, 0);
6723 switch (GET_CODE (pattern))
6725 case SET:
6726 if (GET_CODE (SET_SRC (pattern)) != CALL
6727 && get_attr_type (insn) != TYPE_SFUNC)
6729 targetm.asm_out.internal_label
6730 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6731 break;
6733 /* else FALLTHROUGH */
6734 case CALL:
6735 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6736 CODE_LABEL_NUMBER (XEXP (note, 0)));
6737 break;
6739 default:
6740 gcc_unreachable ();
6746 /* Dump out any constants accumulated in the final pass. These will
6747 only be labels. */
6748 const char *
6749 output_jump_label_table (void)
6751 int i;
6753 if (pool_size)
6755 fprintf (asm_out_file, "\t.align 2\n");
6756 for (i = 0; i < pool_size; i++)
6758 pool_node *p = &pool_vector[i];
6760 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6761 CODE_LABEL_NUMBER (p->label));
6762 output_asm_insn (".long %O0", &p->value);
6764 pool_size = 0;
6767 return "";
6770 /* A full frame looks like:
6772 arg-5
6773 arg-4
6774 [ if current_function_anonymous_args
6775 arg-3
6776 arg-2
6777 arg-1
6778 arg-0 ]
6779 saved-fp
6780 saved-r10
6781 saved-r11
6782 saved-r12
6783 saved-pr
6784 local-n
6786 local-1
6787 local-0 <- fp points here.
6789 Number of bytes pushed for anonymous args, used to pass information
6790 between expand_prologue and expand_epilogue.
6792 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6793 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6794 for an epilogue and a negative value means that it's for a sibcall
6795 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6796 all the registers that are about to be restored, and hence dead. */
6797 static void
6798 output_stack_adjust (int size, rtx reg, int epilogue_p,
6799 HARD_REG_SET *live_regs_mask, bool frame_p)
6801 rtx_insn *(*emit_fn) (rtx) = frame_p ? &frame_insn : &emit_insn;
6802 if (size)
6804 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6806 /* This test is bogus, as output_stack_adjust is used to re-align the
6807 stack. */
6808 #if 0
6809 gcc_assert (!(size % align));
6810 #endif
6812 if (CONST_OK_FOR_ADD (size))
6813 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6814 /* Try to do it with two partial adjustments; however, we must make
6815 sure that the stack is properly aligned at all times, in case
6816 an interrupt occurs between the two partial adjustments. */
6817 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6818 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6820 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6821 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6823 else
6825 rtx const_reg;
6826 rtx insn;
6827 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
6828 int i;
6830 /* If TEMP is invalid, we could temporarily save a general
6831 register to MACL. However, there is currently no need
6832 to handle this case, so just die when we see it. */
6833 if (epilogue_p < 0
6834 || current_function_interrupt
6835 || ! call_really_used_regs[temp] || fixed_regs[temp])
6836 temp = -1;
6837 if (temp < 0 && ! current_function_interrupt
6838 && (TARGET_SHMEDIA || epilogue_p >= 0))
6840 HARD_REG_SET temps;
6841 COPY_HARD_REG_SET (temps, call_used_reg_set);
6842 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6843 if (epilogue_p > 0)
6845 int nreg = 0;
6846 if (crtl->return_rtx)
6848 enum machine_mode mode;
6849 mode = GET_MODE (crtl->return_rtx);
6850 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6851 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6853 for (i = 0; i < nreg; i++)
6854 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6855 if (crtl->calls_eh_return)
6857 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6858 for (i = 0; i <= 3; i++)
6859 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6862 if (TARGET_SHMEDIA && epilogue_p < 0)
6863 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
6864 CLEAR_HARD_REG_BIT (temps, i);
6865 if (epilogue_p <= 0)
6867 for (i = FIRST_PARM_REG;
6868 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6869 CLEAR_HARD_REG_BIT (temps, i);
6870 if (cfun->static_chain_decl != NULL)
6871 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6873 temp = scavenge_reg (&temps);
6875 if (temp < 0 && live_regs_mask)
6877 HARD_REG_SET temps;
6879 COPY_HARD_REG_SET (temps, *live_regs_mask);
6880 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6881 temp = scavenge_reg (&temps);
6883 if (temp < 0)
6885 rtx adj_reg, tmp_reg, mem;
6887 /* If we reached here, the most likely case is the (sibcall)
6888 epilogue for non SHmedia. Put a special push/pop sequence
6889 for such case as the last resort. This looks lengthy but
6890 would not be problem because it seems to be very
6891 rare. */
6893 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
6896 /* ??? There is still the slight possibility that r4 or
6897 r5 have been reserved as fixed registers or assigned
6898 as global registers, and they change during an
6899 interrupt. There are possible ways to handle this:
6901 - If we are adjusting the frame pointer (r14), we can do
6902 with a single temp register and an ordinary push / pop
6903 on the stack.
6904 - Grab any call-used or call-saved registers (i.e. not
6905 fixed or globals) for the temps we need. We might
6906 also grab r14 if we are adjusting the stack pointer.
6907 If we can't find enough available registers, issue
6908 a diagnostic and die - the user must have reserved
6909 way too many registers.
6910 But since all this is rather unlikely to happen and
6911 would require extra testing, we just die if r4 / r5
6912 are not available. */
6913 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6914 && !global_regs[4] && !global_regs[5]);
6916 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6917 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6918 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6919 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6920 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6921 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6922 emit_move_insn (mem, tmp_reg);
6923 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6924 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6925 emit_move_insn (mem, tmp_reg);
6926 emit_move_insn (reg, adj_reg);
6927 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6928 emit_move_insn (adj_reg, mem);
6929 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6930 emit_move_insn (tmp_reg, mem);
6931 /* Tell flow the insns that pop r4/r5 aren't dead. */
6932 emit_use (tmp_reg);
6933 emit_use (adj_reg);
6934 return;
6936 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6938 /* If SIZE is negative, subtract the positive value.
6939 This sometimes allows a constant pool entry to be shared
6940 between prologue and epilogue code. */
6941 if (size < 0)
6943 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6944 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6946 else
6948 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6949 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6951 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6952 gen_rtx_SET (VOIDmode, reg,
6953 gen_rtx_PLUS (SImode, reg,
6954 GEN_INT (size))));
6959 /* Emit the specified insn and mark it as frame related.
6960 FIXME: Rename this to emit_frame_insn. */
6961 static rtx_insn *
6962 frame_insn (rtx x)
6964 rtx_insn *insn = emit_insn (x);
6965 RTX_FRAME_RELATED_P (insn) = 1;
6966 return insn;
6969 /* Output RTL to push register RN onto the stack. */
6970 static rtx
6971 push (int rn)
6973 rtx x;
6974 if (rn == FPUL_REG)
6975 x = gen_push_fpul ();
6976 else if (rn == FPSCR_REG)
6977 x = gen_push_fpscr ();
6978 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
6979 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6981 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6982 return NULL_RTX;
6983 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6985 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6986 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6987 else
6988 x = gen_push (gen_rtx_REG (SImode, rn));
6990 x = frame_insn (x);
6991 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6992 return x;
6995 /* Output RTL to pop register RN from the stack. */
6996 static void
6997 pop (int rn)
6999 rtx x, sp_reg, reg;
7000 if (rn == FPUL_REG)
7001 x = gen_pop_fpul ();
7002 else if (rn == FPSCR_REG)
7003 x = gen_pop_fpscr ();
7004 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7005 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
7007 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
7008 return;
7009 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
7011 else if (TARGET_SH2E && FP_REGISTER_P (rn))
7012 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
7013 else
7014 x = gen_pop (gen_rtx_REG (SImode, rn));
7016 x = emit_insn (x);
7018 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7019 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
7020 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
7021 : SET_DEST (PATTERN (x)));
7022 add_reg_note (x, REG_CFA_RESTORE, reg);
7023 add_reg_note (x, REG_CFA_ADJUST_CFA,
7024 gen_rtx_SET (SImode, sp_reg,
7025 plus_constant (SImode, sp_reg,
7026 GET_MODE_SIZE (GET_MODE (reg)))));
7027 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
7028 RTX_FRAME_RELATED_P (x) = 1;
7031 /* Generate code to push the regs specified in the mask. */
7032 static void
7033 push_regs (HARD_REG_SET *mask, int interrupt_handler)
7035 int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
7036 int skip_fpscr = 0;
7038 /* Push PR last; this gives better latencies after the prologue, and
7039 candidates for the return delay slot when there are no general
7040 registers pushed. */
7041 for (; i < FIRST_PSEUDO_REGISTER; i++)
7043 /* If this is an interrupt handler, and the SZ bit varies,
7044 and we have to push any floating point register, we need
7045 to switch to the correct precision first. */
7046 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
7047 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
7049 HARD_REG_SET unsaved;
7051 push (FPSCR_REG);
7052 COMPL_HARD_REG_SET (unsaved, *mask);
7053 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
7054 skip_fpscr = 1;
7056 if (i != PR_REG
7057 && (i != FPSCR_REG || ! skip_fpscr)
7058 && TEST_HARD_REG_BIT (*mask, i))
7060 /* If the ISR has RESBANK attribute assigned, don't push any of
7061 the following registers - R0-R14, MACH, MACL and GBR. */
7062 if (! (sh_cfun_resbank_handler_p ()
7063 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
7064 || i == MACH_REG
7065 || i == MACL_REG
7066 || i == GBR_REG)))
7067 push (i);
7071 /* Push banked registers last to improve delay slot opportunities. */
7072 if (interrupt_handler)
7074 bool use_movml = false;
7076 if (TARGET_SH2A)
7078 unsigned int count = 0;
7080 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7081 if (TEST_HARD_REG_BIT (*mask, i))
7082 count++;
7083 else
7084 break;
7086 /* Use movml when all banked registers are pushed. */
7087 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7088 use_movml = true;
7091 if (sh_cfun_resbank_handler_p ())
7092 ; /* Do nothing. */
7093 else if (use_movml)
7095 rtx x, mem, reg, set;
7096 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7098 /* We must avoid scheduling multiple store insn with another
7099 insns. */
7100 emit_insn (gen_blockage ());
7101 x = gen_movml_push_banked (sp_reg);
7102 x = frame_insn (x);
7103 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7105 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
7106 reg = gen_rtx_REG (SImode, i);
7107 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (SImode, mem, reg));
7110 set = gen_rtx_SET (SImode, sp_reg,
7111 plus_constant (Pmode, sp_reg, - 32));
7112 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
7113 emit_insn (gen_blockage ());
7115 else
7116 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7117 if (TEST_HARD_REG_BIT (*mask, i))
7118 push (i);
7121 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
7122 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
7123 push (PR_REG);
7126 /* Calculate how much extra space is needed to save all callee-saved
7127 target registers.
7128 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7129 static int
7130 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
7132 int reg;
7133 int stack_space = 0;
7134 int interrupt_handler = sh_cfun_interrupt_handler_p ();
7136 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7137 if ((! call_really_used_regs[reg] || interrupt_handler)
7138 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7139 /* Leave space to save this target register on the stack,
7140 in case target register allocation wants to use it. */
7141 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7142 return stack_space;
7145 /* Decide whether we should reserve space for callee-save target registers,
7146 in case target register allocation wants to use them. REGS_SAVED is
7147 the space, in bytes, that is already required for register saves.
7148 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7149 static int
7150 shmedia_reserve_space_for_target_registers_p (int regs_saved,
7151 HARD_REG_SET *live_regs_mask)
7153 if (optimize_size)
7154 return 0;
7155 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
7158 /* Decide how much space to reserve for callee-save target registers
7159 in case target register allocation wants to use them.
7160 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
7161 static int
7162 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
7164 if (shmedia_space_reserved_for_target_registers)
7165 return shmedia_target_regs_stack_space (live_regs_mask);
7166 else
7167 return 0;
7170 /* Work out the registers which need to be saved, both as a mask and a
7171 count of saved words. Return the count.
7173 If doing a pragma interrupt function, then push all regs used by the
7174 function, and if we call another function (we can tell by looking at PR),
7175 make sure that all the regs it clobbers are safe too. */
7176 static int
7177 calc_live_regs (HARD_REG_SET *live_regs_mask)
7179 unsigned int reg;
7180 int count;
7181 tree attrs;
7182 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7183 bool nosave_low_regs;
7184 int pr_live, has_call;
7186 attrs = DECL_ATTRIBUTES (current_function_decl);
7187 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7188 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7189 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7190 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7192 CLEAR_HARD_REG_SET (*live_regs_mask);
7193 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
7194 && df_regs_ever_live_p (FPSCR_REG))
7195 target_flags &= ~MASK_FPU_SINGLE;
7196 /* If we can save a lot of saves by switching to double mode, do that. */
7197 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD
7198 && TARGET_FPU_SINGLE)
7199 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7200 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7201 && (! call_really_used_regs[reg]
7202 || interrupt_handler)
7203 && ++count > 2)
7205 target_flags &= ~MASK_FPU_SINGLE;
7206 break;
7208 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
7209 knows how to use it. That means the pseudo originally allocated for
7210 the initial value can become the PR_MEDIA_REG hard register, as seen for
7211 execute/20010122-1.c:test9. */
7212 if (TARGET_SHMEDIA)
7213 /* ??? this function is called from initial_elimination_offset, hence we
7214 can't use the result of sh_media_register_for_return here. */
7215 pr_live = sh_pr_n_sets ();
7216 else
7218 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7219 pr_live = (pr_initial
7220 ? (!REG_P (pr_initial)
7221 || REGNO (pr_initial) != (PR_REG))
7222 : df_regs_ever_live_p (PR_REG));
7223 /* For Shcompact, if not optimizing, we end up with a memory reference
7224 using the return address pointer for __builtin_return_address even
7225 though there is no actual need to put the PR register on the stack. */
7226 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7228 /* Force PR to be live if the prologue has to call the SHmedia
7229 argument decoder or register saver. */
7230 if (TARGET_SHCOMPACT
7231 && ((crtl->args.info.call_cookie
7232 & ~ CALL_COOKIE_RET_TRAMP (1))
7233 || crtl->saves_all_registers))
7234 pr_live = 1;
7235 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
7236 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7238 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
7239 ? pr_live
7240 : interrupt_handler
7241 ? (/* Need to save all the regs ever live. */
7242 (df_regs_ever_live_p (reg)
7243 || (call_really_used_regs[reg]
7244 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7245 || reg == PIC_OFFSET_TABLE_REGNUM)
7246 && has_call)
7247 || (TARGET_SHMEDIA && has_call
7248 && REGISTER_NATURAL_MODE (reg) == SImode
7249 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
7250 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7251 && reg != RETURN_ADDRESS_POINTER_REGNUM
7252 && reg != T_REG && reg != GBR_REG
7253 /* Push fpscr only on targets which have FPU */
7254 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7255 : (/* Only push those regs which are used and need to be saved. */
7256 (TARGET_SHCOMPACT
7257 && flag_pic
7258 && crtl->args.info.call_cookie
7259 && reg == PIC_OFFSET_TABLE_REGNUM)
7260 || (df_regs_ever_live_p (reg)
7261 && ((!call_really_used_regs[reg]
7262 && !(reg != PIC_OFFSET_TABLE_REGNUM
7263 && fixed_regs[reg] && call_used_regs[reg]))
7264 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7265 || (crtl->calls_eh_return
7266 && (reg == EH_RETURN_DATA_REGNO (0)
7267 || reg == EH_RETURN_DATA_REGNO (1)
7268 || reg == EH_RETURN_DATA_REGNO (2)
7269 || reg == EH_RETURN_DATA_REGNO (3)))
7270 || ((reg == MACL_REG || reg == MACH_REG)
7271 && df_regs_ever_live_p (reg)
7272 && sh_cfun_attr_renesas_p ())
7275 SET_HARD_REG_BIT (*live_regs_mask, reg);
7276 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7278 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
7279 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7281 if (FP_REGISTER_P (reg))
7283 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7285 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7286 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7289 else if (XD_REGISTER_P (reg))
7291 /* Must switch to double mode to access these registers. */
7292 target_flags &= ~MASK_FPU_SINGLE;
7296 if (nosave_low_regs && reg == R8_REG)
7297 break;
7299 /* If we have a target register optimization pass after prologue / epilogue
7300 threading, we need to assume all target registers will be live even if
7301 they aren't now. */
7302 if (flag_branch_target_load_optimize2
7303 && TARGET_SAVE_ALL_TARGET_REGS
7304 && shmedia_space_reserved_for_target_registers)
7305 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
7306 if ((! call_really_used_regs[reg] || interrupt_handler)
7307 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
7309 SET_HARD_REG_BIT (*live_regs_mask, reg);
7310 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7312 /* If this is an interrupt handler, we don't have any call-clobbered
7313 registers we can conveniently use for target register save/restore.
7314 Make sure we save at least one general purpose register when we need
7315 to save target registers. */
7316 if (interrupt_handler
7317 && hard_reg_set_intersect_p (*live_regs_mask,
7318 reg_class_contents[TARGET_REGS])
7319 && ! hard_reg_set_intersect_p (*live_regs_mask,
7320 reg_class_contents[GENERAL_REGS]))
7322 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
7323 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
7326 return count;
7329 /* Code to generate prologue and epilogue sequences */
7331 /* PUSHED is the number of bytes that are being pushed on the
7332 stack for register saves. Return the frame size, padded
7333 appropriately so that the stack stays properly aligned. */
7334 static HOST_WIDE_INT
7335 rounded_frame_size (int pushed)
7337 HOST_WIDE_INT size = get_frame_size ();
7338 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7340 if (ACCUMULATE_OUTGOING_ARGS)
7341 size += crtl->outgoing_args_size;
7343 return ((size + pushed + align - 1) & -align) - pushed;
7346 /* Choose a call-clobbered target-branch register that remains
7347 unchanged along the whole function. We set it up as the return
7348 value in the prologue. */
7350 sh_media_register_for_return (void)
7352 int regno;
7353 int tr0_used;
7355 if (! crtl->is_leaf)
7356 return -1;
7357 if (lookup_attribute ("interrupt_handler",
7358 DECL_ATTRIBUTES (current_function_decl)))
7359 return -1;
7360 if (sh_cfun_interrupt_handler_p ())
7361 return -1;
7363 tr0_used = flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM);
7365 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
7366 if (call_really_used_regs[regno] && ! df_regs_ever_live_p (regno))
7367 return regno;
7369 return -1;
7372 /* The maximum registers we need to save are:
7373 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
7374 - 32 floating point registers (for each pair, we save none,
7375 one single precision value, or a double precision value).
7376 - 8 target registers
7377 - add 1 entry for a delimiter. */
7378 #define MAX_SAVED_REGS (62+32+8)
7380 typedef struct save_entry_s
7382 unsigned char reg;
7383 unsigned char mode;
7384 short offset;
7385 } save_entry;
7387 #define MAX_TEMPS 4
7389 /* There will be a delimiter entry with VOIDmode both at the start and the
7390 end of a filled in schedule. The end delimiter has the offset of the
7391 save with the smallest (i.e. most negative) offset. */
7392 typedef struct save_schedule_s
7394 save_entry entries[MAX_SAVED_REGS + 2];
7395 int temps[MAX_TEMPS+1];
7396 } save_schedule;
7398 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
7399 use reverse order. Returns the last entry written to (not counting
7400 the delimiter). OFFSET_BASE is a number to be added to all offset
7401 entries. */
7402 static save_entry *
7403 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
7404 int offset_base)
7406 int align, i;
7407 save_entry *entry = schedule->entries;
7408 int tmpx = 0;
7409 int offset;
7411 if (! current_function_interrupt)
7412 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
7413 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
7414 && ! FUNCTION_ARG_REGNO_P (i)
7415 && i != FIRST_RET_REG
7416 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
7417 && ! (crtl->calls_eh_return
7418 && (i == EH_RETURN_STACKADJ_REGNO
7419 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
7420 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
7421 schedule->temps[tmpx++] = i;
7422 entry->reg = -1;
7423 entry->mode = VOIDmode;
7424 entry->offset = offset_base;
7425 entry++;
7426 /* We loop twice: first, we save 8-byte aligned registers in the
7427 higher addresses, that are known to be aligned. Then, we
7428 proceed to saving 32-bit registers that don't need 8-byte
7429 alignment.
7430 If this is an interrupt function, all registers that need saving
7431 need to be saved in full. moreover, we need to postpone saving
7432 target registers till we have saved some general purpose registers
7433 we can then use as scratch registers. */
7434 offset = offset_base;
7435 for (align = 1; align >= 0; align--)
7437 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
7438 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7440 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
7441 int reg = i;
7443 if (current_function_interrupt)
7445 if (TARGET_REGISTER_P (i))
7446 continue;
7447 if (GENERAL_REGISTER_P (i))
7448 mode = DImode;
7450 if (mode == SFmode && (i % 2) == 1
7451 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
7452 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
7454 mode = DFmode;
7455 i--;
7456 reg--;
7459 /* If we're doing the aligned pass and this is not aligned,
7460 or we're doing the unaligned pass and this is aligned,
7461 skip it. */
7462 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
7463 != align)
7464 continue;
7466 if (current_function_interrupt
7467 && GENERAL_REGISTER_P (i)
7468 && tmpx < MAX_TEMPS)
7469 schedule->temps[tmpx++] = i;
7471 offset -= GET_MODE_SIZE (mode);
7472 entry->reg = i;
7473 entry->mode = mode;
7474 entry->offset = offset;
7475 entry++;
7477 if (align && current_function_interrupt)
7478 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
7479 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
7481 offset -= GET_MODE_SIZE (DImode);
7482 entry->reg = i;
7483 entry->mode = DImode;
7484 entry->offset = offset;
7485 entry++;
7488 entry->reg = -1;
7489 entry->mode = VOIDmode;
7490 entry->offset = offset;
7491 schedule->temps[tmpx] = -1;
7492 return entry - 1;
7495 /* Expand code for the function prologue. */
7496 void
7497 sh_expand_prologue (void)
7499 HARD_REG_SET live_regs_mask;
7500 int d, i;
7501 int d_rounding = 0;
7502 int save_flags = target_flags;
7503 int pretend_args;
7504 int stack_usage;
7505 tree sp_switch_attr
7506 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7508 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7510 /* We have pretend args if we had an object sent partially in registers
7511 and partially on the stack, e.g. a large structure. */
7512 pretend_args = crtl->args.pretend_args_size;
7513 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7514 && (NPARM_REGS(SImode)
7515 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7516 pretend_args = 0;
7518 output_stack_adjust (-pretend_args
7519 - crtl->args.info.stack_regs * 8,
7520 stack_pointer_rtx, 0, NULL, true);
7521 stack_usage = pretend_args + crtl->args.info.stack_regs * 8;
7523 if (TARGET_SHCOMPACT && flag_pic && crtl->args.info.call_cookie)
7524 /* We're going to use the PIC register to load the address of the
7525 incoming-argument decoder and/or of the return trampoline from
7526 the GOT, so make sure the PIC register is preserved and
7527 initialized. */
7528 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7530 if (TARGET_SHCOMPACT
7531 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7533 int reg;
7535 /* First, make all registers with incoming arguments that will
7536 be pushed onto the stack live, so that register renaming
7537 doesn't overwrite them. */
7538 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
7539 if (CALL_COOKIE_STACKSEQ_GET (crtl->args.info.call_cookie)
7540 >= NPARM_REGS (SImode) - reg)
7541 for (; reg < NPARM_REGS (SImode); reg++)
7542 emit_insn (gen_shcompact_preserve_incoming_args
7543 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7544 else if (CALL_COOKIE_INT_REG_GET
7545 (crtl->args.info.call_cookie, reg) == 1)
7546 emit_insn (gen_shcompact_preserve_incoming_args
7547 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
7549 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
7550 stack_pointer_rtx);
7551 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
7552 GEN_INT (crtl->args.info.call_cookie));
7553 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
7554 gen_rtx_REG (SImode, R0_REG));
7556 else if (TARGET_SHMEDIA)
7558 int tr = sh_media_register_for_return ();
7560 if (tr >= 0)
7561 emit_move_insn (gen_rtx_REG (DImode, tr),
7562 gen_rtx_REG (DImode, PR_MEDIA_REG));
7565 /* Emit the code for SETUP_VARARGS. */
7566 if (cfun->stdarg)
7568 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7570 /* Push arg regs as if they'd been provided by caller in stack. */
7571 for (i = 0; i < NPARM_REGS(SImode); i++)
7573 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7575 if (i >= (NPARM_REGS(SImode)
7576 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7578 break;
7579 push (rn);
7580 stack_usage += GET_MODE_SIZE (SImode);
7585 /* If we're supposed to switch stacks at function entry, do so now. */
7586 if (sp_switch_attr)
7588 rtx lab, newsrc;
7589 /* The argument specifies a variable holding the address of the
7590 stack the interrupt function should switch to/from at entry/exit. */
7591 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7592 const char *s
7593 = ggc_strdup (TREE_STRING_POINTER (arg));
7594 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7596 lab = add_constant (sp_switch, SImode, 0);
7597 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7599 emit_insn (gen_sp_switch_1 (newsrc));
7602 d = calc_live_regs (&live_regs_mask);
7603 /* ??? Maybe we could save some switching if we can move a mode switch
7604 that already happens to be at the function start into the prologue. */
7605 if (target_flags != save_flags && ! current_function_interrupt)
7606 emit_insn (gen_toggle_sz ());
7608 if (TARGET_SH5)
7610 int offset_base, offset;
7611 rtx r0 = NULL_RTX;
7612 int offset_in_r0 = -1;
7613 int sp_in_r0 = 0;
7614 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7615 int total_size, save_size;
7616 save_schedule schedule;
7617 save_entry *entry;
7618 int *tmp_pnt;
7620 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
7621 && ! current_function_interrupt)
7622 r0 = gen_rtx_REG (Pmode, R0_REG);
7624 /* D is the actual number of bytes that we need for saving registers,
7625 however, in initial_elimination_offset we have committed to using
7626 an additional TREGS_SPACE amount of bytes - in order to keep both
7627 addresses to arguments supplied by the caller and local variables
7628 valid, we must keep this gap. Place it between the incoming
7629 arguments and the actually saved registers in a bid to optimize
7630 locality of reference. */
7631 total_size = d + tregs_space;
7632 total_size += rounded_frame_size (total_size);
7633 save_size = total_size - rounded_frame_size (d);
7634 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
7635 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7636 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
7638 /* If adjusting the stack in a single step costs nothing extra, do so.
7639 I.e. either if a single addi is enough, or we need a movi anyway,
7640 and we don't exceed the maximum offset range (the test for the
7641 latter is conservative for simplicity). */
7642 if (TARGET_SHMEDIA
7643 && (CONST_OK_FOR_I10 (-total_size)
7644 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
7645 && total_size <= 2044)))
7646 d_rounding = total_size - save_size;
7648 offset_base = d + d_rounding;
7650 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
7651 0, NULL, true);
7652 stack_usage += save_size + d_rounding;
7654 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
7655 tmp_pnt = schedule.temps;
7656 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7658 enum machine_mode mode = (enum machine_mode) entry->mode;
7659 unsigned int reg = entry->reg;
7660 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
7661 rtx orig_reg_rtx;
7663 offset = entry->offset;
7665 reg_rtx = gen_rtx_REG (mode, reg);
7667 mem_rtx = gen_frame_mem (mode,
7668 gen_rtx_PLUS (Pmode,
7669 stack_pointer_rtx,
7670 GEN_INT (offset)));
7672 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7674 gcc_assert (r0);
7675 mem_rtx = NULL_RTX;
7678 if (HAVE_PRE_DECREMENT
7679 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
7680 || mem_rtx == NULL_RTX
7681 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7683 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
7685 if (!memory_address_p (mode, XEXP (pre_dec, 0)))
7686 pre_dec = NULL_RTX;
7687 else
7689 mem_rtx = NULL_RTX;
7690 offset += GET_MODE_SIZE (mode);
7694 if (mem_rtx != NULL_RTX)
7695 goto addr_ok;
7697 if (offset_in_r0 == -1)
7699 emit_move_insn (r0, GEN_INT (offset));
7700 offset_in_r0 = offset;
7702 else if (offset != offset_in_r0)
7704 emit_move_insn (r0,
7705 gen_rtx_PLUS
7706 (Pmode, r0,
7707 GEN_INT (offset - offset_in_r0)));
7708 offset_in_r0 += offset - offset_in_r0;
7711 if (pre_dec != NULL_RTX)
7713 if (! sp_in_r0)
7715 emit_move_insn (r0,
7716 gen_rtx_PLUS
7717 (Pmode, r0, stack_pointer_rtx));
7718 sp_in_r0 = 1;
7721 offset -= GET_MODE_SIZE (mode);
7722 offset_in_r0 -= GET_MODE_SIZE (mode);
7724 mem_rtx = pre_dec;
7726 else if (sp_in_r0)
7727 mem_rtx = gen_frame_mem (mode, r0);
7728 else
7729 mem_rtx = gen_frame_mem (mode,
7730 gen_rtx_PLUS (Pmode,
7731 stack_pointer_rtx,
7732 r0));
7734 /* We must not use an r0-based address for target-branch
7735 registers or for special registers without pre-dec
7736 memory addresses, since we store their values in r0
7737 first. */
7738 gcc_assert (!TARGET_REGISTER_P (reg)
7739 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
7740 || mem_rtx == pre_dec));
7742 addr_ok:
7743 orig_reg_rtx = reg_rtx;
7744 if (TARGET_REGISTER_P (reg)
7745 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
7746 && mem_rtx != pre_dec))
7748 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
7750 emit_move_insn (tmp_reg, reg_rtx);
7752 if (REGNO (tmp_reg) == R0_REG)
7754 offset_in_r0 = -1;
7755 sp_in_r0 = 0;
7756 gcc_assert (!refers_to_regno_p
7757 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
7760 if (*++tmp_pnt <= 0)
7761 tmp_pnt = schedule.temps;
7763 reg_rtx = tmp_reg;
7766 rtx insn;
7768 /* Mark as interesting for dwarf cfi generator */
7769 insn = emit_move_insn (mem_rtx, reg_rtx);
7770 RTX_FRAME_RELATED_P (insn) = 1;
7771 /* If we use an intermediate register for the save, we can't
7772 describe this exactly in cfi as a copy of the to-be-saved
7773 register into the temporary register and then the temporary
7774 register on the stack, because the temporary register can
7775 have a different natural size than the to-be-saved register.
7776 Thus, we gloss over the intermediate copy and pretend we do
7777 a direct save from the to-be-saved register. */
7778 if (REGNO (reg_rtx) != reg)
7780 rtx set;
7782 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
7783 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7786 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
7788 rtx reg_rtx = gen_rtx_REG (mode, reg);
7789 rtx set;
7790 rtx mem_rtx = gen_frame_mem (mode,
7791 gen_rtx_PLUS (Pmode,
7792 stack_pointer_rtx,
7793 GEN_INT (offset)));
7795 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
7796 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
7801 gcc_assert (entry->offset == d_rounding);
7803 else
7805 push_regs (&live_regs_mask, current_function_interrupt);
7806 stack_usage += d;
7809 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7810 emit_insn (gen_GOTaddr2picreg ());
7812 if (SHMEDIA_REGS_STACK_ADJUST ())
7814 /* This must NOT go through the PLT, otherwise mach and macl
7815 may be clobbered. */
7816 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7817 (TARGET_FPU_ANY
7818 ? "__GCC_push_shmedia_regs"
7819 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
7820 emit_insn (gen_shmedia_save_restore_regs_compact
7821 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
7824 if (target_flags != save_flags && ! current_function_interrupt)
7825 emit_insn (gen_toggle_sz ());
7827 target_flags = save_flags;
7829 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
7830 stack_pointer_rtx, 0, NULL, true);
7831 stack_usage += rounded_frame_size (d) - d_rounding;
7833 if (frame_pointer_needed)
7834 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7836 if (TARGET_SHCOMPACT
7837 && (crtl->args.info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
7839 /* This must NOT go through the PLT, otherwise mach and macl
7840 may be clobbered. */
7841 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7842 "__GCC_shcompact_incoming_args", SFUNC_GOT);
7843 emit_insn (gen_shcompact_incoming_args ());
7846 /* If we are profiling, make sure no instructions are scheduled before
7847 the call to mcount. Similarly if some call instructions are swapped
7848 before frame related insns, it'll confuse the unwinder because
7849 currently SH has no unwind info for function epilogues. */
7850 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7851 emit_insn (gen_blockage ());
7853 if (flag_stack_usage_info)
7854 current_function_static_stack_size = stack_usage;
7857 /* Expand code for the function epilogue. */
7858 void
7859 sh_expand_epilogue (bool sibcall_p)
7861 HARD_REG_SET live_regs_mask;
7862 int d, i;
7863 int d_rounding = 0;
7865 int save_flags = target_flags;
7866 int frame_size, save_size;
7867 int fpscr_deferred = 0;
7868 int e = sibcall_p ? -1 : 1;
7870 d = calc_live_regs (&live_regs_mask);
7872 save_size = d;
7873 frame_size = rounded_frame_size (d);
7875 if (TARGET_SH5)
7877 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
7878 int total_size;
7879 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
7880 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7881 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
7883 total_size = d + tregs_space;
7884 total_size += rounded_frame_size (total_size);
7885 save_size = total_size - frame_size;
7887 /* If adjusting the stack in a single step costs nothing extra, do so.
7888 I.e. either if a single addi is enough, or we need a movi anyway,
7889 and we don't exceed the maximum offset range (the test for the
7890 latter is conservative for simplicity). */
7891 if (TARGET_SHMEDIA
7892 && ! frame_pointer_needed
7893 && (CONST_OK_FOR_I10 (total_size)
7894 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
7895 && total_size <= 2044)))
7896 d_rounding = frame_size;
7898 frame_size -= d_rounding;
7901 if (frame_pointer_needed)
7903 /* We must avoid scheduling the epilogue with previous basic blocks.
7904 See PR/18032 and PR/40313. */
7905 emit_insn (gen_blockage ());
7906 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7907 &live_regs_mask, true);
7909 /* We must avoid moving the stack pointer adjustment past code
7910 which reads from the local frame, else an interrupt could
7911 occur after the SP adjustment and clobber data in the local
7912 frame. */
7913 emit_insn (gen_blockage ());
7914 frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7916 else if (frame_size)
7918 /* We must avoid moving the stack pointer adjustment past code
7919 which reads from the local frame, else an interrupt could
7920 occur after the SP adjustment and clobber data in the local
7921 frame. */
7922 emit_insn (gen_blockage ());
7923 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7924 &live_regs_mask, true);
7927 if (SHMEDIA_REGS_STACK_ADJUST ())
7929 function_symbol (gen_rtx_REG (Pmode, R0_REG),
7930 (TARGET_FPU_ANY
7931 ? "__GCC_pop_shmedia_regs"
7932 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
7933 /* This must NOT go through the PLT, otherwise mach and macl
7934 may be clobbered. */
7935 emit_insn (gen_shmedia_save_restore_regs_compact
7936 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
7939 /* Pop all the registers. */
7941 if (target_flags != save_flags && ! current_function_interrupt)
7942 emit_insn (gen_toggle_sz ());
7943 if (TARGET_SH5)
7945 int offset_base, offset;
7946 int offset_in_r0 = -1;
7947 int sp_in_r0 = 0;
7948 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
7949 save_schedule schedule;
7950 save_entry *entry;
7951 int *tmp_pnt;
7953 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
7954 offset_base = -entry[1].offset + d_rounding;
7955 tmp_pnt = schedule.temps;
7956 for (; entry->mode != VOIDmode; entry--)
7958 enum machine_mode mode = (enum machine_mode) entry->mode;
7959 int reg = entry->reg;
7960 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX;
7962 offset = offset_base + entry->offset;
7963 reg_rtx = gen_rtx_REG (mode, reg);
7965 mem_rtx = gen_frame_mem (mode,
7966 gen_rtx_PLUS (Pmode,
7967 stack_pointer_rtx,
7968 GEN_INT (offset)));
7970 if (!memory_address_p (mode, XEXP (mem_rtx, 0)))
7971 mem_rtx = NULL_RTX;
7973 if (HAVE_POST_INCREMENT
7974 && (offset == offset_in_r0
7975 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
7976 && mem_rtx == NULL_RTX)
7977 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
7979 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
7981 if (!memory_address_p (mode, XEXP (post_inc, 0)))
7982 post_inc = NULL_RTX;
7983 else
7984 mem_rtx = NULL_RTX;
7987 if (mem_rtx != NULL_RTX)
7988 goto addr_ok;
7990 if (offset_in_r0 == -1)
7992 emit_move_insn (r0, GEN_INT (offset));
7993 offset_in_r0 = offset;
7995 else if (offset != offset_in_r0)
7997 emit_move_insn (r0,
7998 gen_rtx_PLUS
7999 (Pmode, r0,
8000 GEN_INT (offset - offset_in_r0)));
8001 offset_in_r0 += offset - offset_in_r0;
8004 if (post_inc != NULL_RTX)
8006 if (! sp_in_r0)
8008 emit_move_insn (r0,
8009 gen_rtx_PLUS
8010 (Pmode, r0, stack_pointer_rtx));
8011 sp_in_r0 = 1;
8014 mem_rtx = post_inc;
8016 offset_in_r0 += GET_MODE_SIZE (mode);
8018 else if (sp_in_r0)
8019 mem_rtx = gen_frame_mem (mode, r0);
8020 else
8021 mem_rtx = gen_frame_mem (mode,
8022 gen_rtx_PLUS (Pmode,
8023 stack_pointer_rtx,
8024 r0));
8026 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
8027 || mem_rtx == post_inc);
8029 addr_ok:
8030 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
8031 && mem_rtx != post_inc)
8033 emit_move_insn (r0, mem_rtx);
8034 mem_rtx = r0;
8036 else if (TARGET_REGISTER_P (reg))
8038 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
8040 /* Give the scheduler a bit of freedom by using up to
8041 MAX_TEMPS registers in a round-robin fashion. */
8042 emit_move_insn (tmp_reg, mem_rtx);
8043 mem_rtx = tmp_reg;
8044 if (*++tmp_pnt < 0)
8045 tmp_pnt = schedule.temps;
8048 emit_move_insn (reg_rtx, mem_rtx);
8051 gcc_assert (entry->offset + offset_base == d + d_rounding);
8053 else /* ! TARGET_SH5 */
8055 int last_reg;
8057 save_size = 0;
8058 /* For an ISR with RESBANK attribute assigned, don't pop PR
8059 register. */
8060 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
8061 && !sh_cfun_resbank_handler_p ())
8063 if (!frame_pointer_needed)
8064 emit_insn (gen_blockage ());
8065 pop (PR_REG);
8068 /* Banked registers are popped first to avoid being scheduled in the
8069 delay slot. RTE switches banks before the ds instruction. */
8070 if (current_function_interrupt)
8072 bool use_movml = false;
8074 if (TARGET_SH2A)
8076 unsigned int count = 0;
8078 for (i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
8079 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8080 count++;
8081 else
8082 break;
8084 /* Use movml when all banked register are poped. */
8085 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
8086 use_movml = true;
8089 if (sh_cfun_resbank_handler_p ())
8090 ; /* Do nothing. */
8091 else if (use_movml)
8093 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
8095 /* We must avoid scheduling multiple load insn with another
8096 insns. */
8097 emit_insn (gen_blockage ());
8098 emit_insn (gen_movml_pop_banked (sp_reg));
8099 emit_insn (gen_blockage ());
8101 else
8102 for (i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
8103 if (TEST_HARD_REG_BIT (live_regs_mask, i))
8104 pop (i);
8106 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
8108 else
8109 last_reg = FIRST_PSEUDO_REGISTER;
8111 for (i = 0; i < last_reg; i++)
8113 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
8115 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
8116 && hard_reg_set_intersect_p (live_regs_mask,
8117 reg_class_contents[DF_REGS]))
8118 fpscr_deferred = 1;
8119 /* For an ISR with RESBANK attribute assigned, don't pop
8120 following registers, R0-R14, MACH, MACL and GBR. */
8121 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
8122 && ! (sh_cfun_resbank_handler_p ()
8123 && ((j >= FIRST_GENERAL_REG
8124 && j < LAST_GENERAL_REG)
8125 || j == MACH_REG
8126 || j == MACL_REG
8127 || j == GBR_REG)))
8128 pop (j);
8130 if (j == FIRST_FP_REG && fpscr_deferred)
8131 pop (FPSCR_REG);
8134 if (target_flags != save_flags && ! current_function_interrupt)
8135 emit_insn (gen_toggle_sz ());
8136 target_flags = save_flags;
8138 output_stack_adjust (crtl->args.pretend_args_size
8139 + save_size + d_rounding
8140 + crtl->args.info.stack_regs * 8,
8141 stack_pointer_rtx, e, NULL, true);
8143 if (crtl->calls_eh_return)
8144 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
8145 EH_RETURN_STACKADJ_RTX));
8147 /* Switch back to the normal stack if necessary. */
8148 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
8149 emit_insn (gen_sp_switch_2 ());
8151 /* Tell flow the insn that pops PR isn't dead. */
8152 /* PR_REG will never be live in SHmedia mode, and we don't need to
8153 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
8154 by the return pattern. */
8155 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
8156 emit_use (gen_rtx_REG (SImode, PR_REG));
8159 /* Emit code to change the current function's return address to RA.
8160 TEMP is available as a scratch register, if needed. */
8161 void
8162 sh_set_return_address (rtx ra, rtx tmp)
8164 HARD_REG_SET live_regs_mask;
8165 int d;
8166 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
8167 int pr_offset;
8169 d = calc_live_regs (&live_regs_mask);
8171 /* If pr_reg isn't life, we can set it (or the register given in
8172 sh_media_register_for_return) directly. */
8173 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
8175 rtx rr;
8177 if (TARGET_SHMEDIA)
8179 int rr_regno = sh_media_register_for_return ();
8181 if (rr_regno < 0)
8182 rr_regno = pr_reg;
8184 rr = gen_rtx_REG (DImode, rr_regno);
8186 else
8187 rr = gen_rtx_REG (SImode, pr_reg);
8189 emit_insn (GEN_MOV (rr, ra));
8190 /* Tell flow the register for return isn't dead. */
8191 emit_use (rr);
8192 return;
8195 if (TARGET_SH5)
8197 int offset;
8198 save_schedule schedule;
8199 save_entry *entry;
8201 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
8202 offset = entry[1].offset;
8203 for (; entry->mode != VOIDmode; entry--)
8204 if (entry->reg == pr_reg)
8205 goto found;
8207 /* We can't find pr register. */
8208 gcc_unreachable ();
8210 found:
8211 offset = entry->offset - offset;
8212 pr_offset = (rounded_frame_size (d) + offset
8213 + SHMEDIA_REGS_STACK_ADJUST ());
8215 else
8216 pr_offset = rounded_frame_size (d);
8218 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
8220 if (frame_pointer_needed)
8221 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
8222 else
8223 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
8225 tmp = gen_frame_mem (Pmode, tmp);
8226 emit_insn (GEN_MOV (tmp, ra));
8227 /* Tell this store isn't dead. */
8228 emit_use (tmp);
8231 /* Clear variables at function end. */
8232 static void
8233 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8234 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8238 static rtx
8239 sh_builtin_saveregs (void)
8241 /* First unnamed integer register. */
8242 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
8243 /* Number of integer registers we need to save. */
8244 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
8245 /* First unnamed SFmode float reg */
8246 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
8247 /* Number of SFmode float regs to save. */
8248 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
8249 rtx regbuf, fpregs;
8250 int bufsize, regno;
8251 alias_set_type alias_set;
8253 if (TARGET_SH5)
8255 if (n_intregs)
8257 int pushregs = n_intregs;
8259 while (pushregs < NPARM_REGS (SImode) - 1
8260 && (CALL_COOKIE_INT_REG_GET
8261 (crtl->args.info.call_cookie,
8262 NPARM_REGS (SImode) - pushregs)
8263 == 1))
8265 crtl->args.info.call_cookie
8266 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
8267 - pushregs, 1);
8268 pushregs++;
8271 if (pushregs == NPARM_REGS (SImode))
8272 crtl->args.info.call_cookie
8273 |= (CALL_COOKIE_INT_REG (0, 1)
8274 | CALL_COOKIE_STACKSEQ (pushregs - 1));
8275 else
8276 crtl->args.info.call_cookie
8277 |= CALL_COOKIE_STACKSEQ (pushregs);
8279 crtl->args.pretend_args_size += 8 * n_intregs;
8281 if (TARGET_SHCOMPACT)
8282 return const0_rtx;
8285 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
8287 error ("__builtin_saveregs not supported by this subtarget");
8288 return const0_rtx;
8291 if (TARGET_SHMEDIA)
8292 n_floatregs = 0;
8294 /* Allocate block of memory for the regs. */
8295 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
8296 Or can assign_stack_local accept a 0 SIZE argument? */
8297 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
8299 if (TARGET_SHMEDIA)
8300 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
8301 else if (n_floatregs & 1)
8303 rtx addr;
8305 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8306 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
8307 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
8308 regbuf = change_address (regbuf, BLKmode, addr);
8310 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
8312 rtx addr, mask;
8314 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
8315 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
8316 XEXP (regbuf, 0), 4));
8317 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
8318 emit_insn (gen_andsi3 (addr, addr, mask));
8319 regbuf = change_address (regbuf, BLKmode, addr);
8321 else
8322 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
8323 alias_set = get_varargs_alias_set ();
8324 set_mem_alias_set (regbuf, alias_set);
8326 /* Save int args.
8327 This is optimized to only save the regs that are necessary. Explicitly
8328 named args need not be saved. */
8329 if (n_intregs > 0)
8330 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
8331 adjust_address (regbuf, BLKmode,
8332 n_floatregs * UNITS_PER_WORD),
8333 n_intregs);
8335 if (TARGET_SHMEDIA)
8336 /* Return the address of the regbuf. */
8337 return XEXP (regbuf, 0);
8339 /* Save float args.
8340 This is optimized to only save the regs that are necessary. Explicitly
8341 named args need not be saved.
8342 We explicitly build a pointer to the buffer because it halves the insn
8343 count when not optimizing (otherwise the pointer is built for each reg
8344 saved).
8345 We emit the moves in reverse order so that we can use predecrement. */
8347 fpregs = copy_to_mode_reg (Pmode,
8348 plus_constant (Pmode, XEXP (regbuf, 0),
8349 n_floatregs * UNITS_PER_WORD));
8350 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8352 rtx mem;
8353 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
8355 emit_insn (gen_addsi3 (fpregs, fpregs,
8356 GEN_INT (-2 * UNITS_PER_WORD)));
8357 mem = change_address (regbuf, DFmode, fpregs);
8358 emit_move_insn (mem,
8359 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
8361 regno = first_floatreg;
8362 if (regno & 1)
8364 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8365 mem = change_address (regbuf, SFmode, fpregs);
8366 emit_move_insn (mem,
8367 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
8368 + regno - SH_REG_MSW_OFFSET));
8371 else
8372 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
8374 rtx mem;
8376 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
8377 mem = change_address (regbuf, SFmode, fpregs);
8378 emit_move_insn (mem,
8379 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
8382 /* Return the address of the regbuf. */
8383 return XEXP (regbuf, 0);
8386 /* Define the `__builtin_va_list' type for the ABI. */
8387 static tree
8388 sh_build_builtin_va_list (void)
8390 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8391 tree record, type_decl;
8393 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
8394 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8395 return ptr_type_node;
8397 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
8398 type_decl = build_decl (BUILTINS_LOCATION,
8399 TYPE_DECL, get_identifier ("__va_list_tag"), record);
8401 f_next_o = build_decl (BUILTINS_LOCATION,
8402 FIELD_DECL, get_identifier ("__va_next_o"),
8403 ptr_type_node);
8404 f_next_o_limit = build_decl (BUILTINS_LOCATION,
8405 FIELD_DECL,
8406 get_identifier ("__va_next_o_limit"),
8407 ptr_type_node);
8408 f_next_fp = build_decl (BUILTINS_LOCATION,
8409 FIELD_DECL, get_identifier ("__va_next_fp"),
8410 ptr_type_node);
8411 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
8412 FIELD_DECL,
8413 get_identifier ("__va_next_fp_limit"),
8414 ptr_type_node);
8415 f_next_stack = build_decl (BUILTINS_LOCATION,
8416 FIELD_DECL, get_identifier ("__va_next_stack"),
8417 ptr_type_node);
8419 DECL_FIELD_CONTEXT (f_next_o) = record;
8420 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
8421 DECL_FIELD_CONTEXT (f_next_fp) = record;
8422 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
8423 DECL_FIELD_CONTEXT (f_next_stack) = record;
8425 TYPE_STUB_DECL (record) = type_decl;
8426 TYPE_NAME (record) = type_decl;
8427 TYPE_FIELDS (record) = f_next_o;
8428 DECL_CHAIN (f_next_o) = f_next_o_limit;
8429 DECL_CHAIN (f_next_o_limit) = f_next_fp;
8430 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
8431 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
8433 layout_type (record);
8435 return record;
8438 /* Implement `va_start' for varargs and stdarg. */
8439 static void
8440 sh_va_start (tree valist, rtx nextarg)
8442 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8443 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8444 tree t, u;
8445 int nfp, nint;
8447 if (TARGET_SH5)
8449 expand_builtin_saveregs ();
8450 std_expand_builtin_va_start (valist, nextarg);
8451 return;
8454 if ((! TARGET_SH2E && ! TARGET_SH4)
8455 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
8457 std_expand_builtin_va_start (valist, nextarg);
8458 return;
8461 f_next_o = TYPE_FIELDS (va_list_type_node);
8462 f_next_o_limit = DECL_CHAIN (f_next_o);
8463 f_next_fp = DECL_CHAIN (f_next_o_limit);
8464 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8465 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8467 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8468 NULL_TREE);
8469 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8470 valist, f_next_o_limit, NULL_TREE);
8471 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
8472 NULL_TREE);
8473 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8474 valist, f_next_fp_limit, NULL_TREE);
8475 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8476 valist, f_next_stack, NULL_TREE);
8478 /* Call __builtin_saveregs. */
8479 u = make_tree (sizetype, expand_builtin_saveregs ());
8480 u = fold_convert (ptr_type_node, u);
8481 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
8482 TREE_SIDE_EFFECTS (t) = 1;
8483 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8485 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
8486 if (nfp < 8)
8487 nfp = 8 - nfp;
8488 else
8489 nfp = 0;
8490 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
8491 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
8492 TREE_SIDE_EFFECTS (t) = 1;
8493 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8495 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
8496 TREE_SIDE_EFFECTS (t) = 1;
8497 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8499 nint = crtl->args.info.arg_count[SH_ARG_INT];
8500 if (nint < 4)
8501 nint = 4 - nint;
8502 else
8503 nint = 0;
8504 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
8505 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
8506 TREE_SIDE_EFFECTS (t) = 1;
8507 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8509 u = make_tree (ptr_type_node, nextarg);
8510 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
8511 TREE_SIDE_EFFECTS (t) = 1;
8512 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
8515 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
8516 member, return it. */
8517 static tree
8518 find_sole_member (tree type)
8520 tree field, member = NULL_TREE;
8522 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
8524 if (TREE_CODE (field) != FIELD_DECL)
8525 continue;
8526 if (!DECL_SIZE (field))
8527 return NULL_TREE;
8528 if (integer_zerop (DECL_SIZE (field)))
8529 continue;
8530 if (member)
8531 return NULL_TREE;
8532 member = field;
8534 return member;
8537 /* Implement `va_arg'. */
8538 static tree
8539 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
8540 gimple_seq *post_p ATTRIBUTE_UNUSED)
8542 HOST_WIDE_INT size, rsize;
8543 tree tmp, pptr_type_node;
8544 tree addr, lab_over = NULL, result = NULL;
8545 bool pass_by_ref;
8546 tree eff_type;
8548 if (!VOID_TYPE_P (type))
8549 pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
8550 else
8551 pass_by_ref = false;
8553 if (pass_by_ref)
8554 type = build_pointer_type (type);
8556 size = int_size_in_bytes (type);
8557 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
8558 pptr_type_node = build_pointer_type (ptr_type_node);
8560 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
8561 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
8563 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
8564 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
8565 int pass_as_float;
8566 tree lab_false;
8567 tree member;
8569 f_next_o = TYPE_FIELDS (va_list_type_node);
8570 f_next_o_limit = DECL_CHAIN (f_next_o);
8571 f_next_fp = DECL_CHAIN (f_next_o_limit);
8572 f_next_fp_limit = DECL_CHAIN (f_next_fp);
8573 f_next_stack = DECL_CHAIN (f_next_fp_limit);
8575 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
8576 NULL_TREE);
8577 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
8578 valist, f_next_o_limit, NULL_TREE);
8579 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
8580 valist, f_next_fp, NULL_TREE);
8581 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
8582 valist, f_next_fp_limit, NULL_TREE);
8583 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
8584 valist, f_next_stack, NULL_TREE);
8586 /* Structures with a single member with a distinct mode are passed
8587 like their member. This is relevant if the latter has a REAL_TYPE
8588 or COMPLEX_TYPE type. */
8589 eff_type = type;
8590 while (TREE_CODE (eff_type) == RECORD_TYPE
8591 && (member = find_sole_member (eff_type))
8592 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
8593 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
8594 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
8596 tree field_type = TREE_TYPE (member);
8598 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
8599 eff_type = field_type;
8600 else
8602 gcc_assert ((TYPE_ALIGN (eff_type)
8603 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
8604 || (TYPE_ALIGN (eff_type)
8605 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
8606 break;
8610 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
8612 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
8613 || (TREE_CODE (eff_type) == COMPLEX_TYPE
8614 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
8615 && size <= 16));
8617 else
8619 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
8622 addr = create_tmp_var (pptr_type_node, NULL);
8623 lab_false = create_artificial_label (UNKNOWN_LOCATION);
8624 lab_over = create_artificial_label (UNKNOWN_LOCATION);
8626 valist = build_simple_mem_ref (addr);
8628 if (pass_as_float)
8630 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
8631 tree cmp;
8632 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
8634 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
8635 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8637 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
8638 tmp = next_fp_limit;
8639 if (size > 4 && !is_double)
8640 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
8641 tmp = build2 (GE_EXPR, boolean_type_node,
8642 unshare_expr (next_fp_tmp), unshare_expr (tmp));
8643 cmp = build3 (COND_EXPR, void_type_node, tmp,
8644 build1 (GOTO_EXPR, void_type_node,
8645 unshare_expr (lab_false)), NULL_TREE);
8646 if (!is_double)
8647 gimplify_and_add (cmp, pre_p);
8649 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
8650 || (is_double || size == 16))
8652 tmp = fold_convert (sizetype, next_fp_tmp);
8653 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
8654 size_int (UNITS_PER_WORD));
8655 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
8656 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
8658 if (is_double)
8659 gimplify_and_add (cmp, pre_p);
8661 #ifdef FUNCTION_ARG_SCmode_WART
8662 if (TYPE_MODE (eff_type) == SCmode
8663 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
8665 tree subtype = TREE_TYPE (eff_type);
8666 tree real, imag;
8668 imag
8669 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8670 imag = get_initialized_tmp_var (imag, pre_p, NULL);
8672 real
8673 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
8674 real = get_initialized_tmp_var (real, pre_p, NULL);
8676 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
8677 if (type != eff_type)
8678 result = build1 (VIEW_CONVERT_EXPR, type, result);
8679 result = get_initialized_tmp_var (result, pre_p, NULL);
8681 #endif /* FUNCTION_ARG_SCmode_WART */
8683 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8684 gimplify_and_add (tmp, pre_p);
8686 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8687 gimplify_and_add (tmp, pre_p);
8689 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8690 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8691 gimplify_assign (unshare_expr (next_fp_tmp),
8692 unshare_expr (valist), pre_p);
8694 gimplify_assign (unshare_expr (valist),
8695 unshare_expr (next_fp_tmp), post_p);
8696 valist = next_fp_tmp;
8698 else
8700 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
8701 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
8702 unshare_expr (next_o_limit));
8703 tmp = build3 (COND_EXPR, void_type_node, tmp,
8704 build1 (GOTO_EXPR, void_type_node,
8705 unshare_expr (lab_false)),
8706 NULL_TREE);
8707 gimplify_and_add (tmp, pre_p);
8709 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
8710 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8712 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
8713 gimplify_and_add (tmp, pre_p);
8715 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
8716 gimplify_and_add (tmp, pre_p);
8718 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
8719 gimplify_assign (unshare_expr (next_o),
8720 unshare_expr (next_o_limit), pre_p);
8722 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
8723 gimplify_assign (unshare_expr (addr), tmp, pre_p);
8726 if (!result)
8728 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8729 gimplify_and_add (tmp, pre_p);
8733 /* ??? In va-sh.h, there had been code to make values larger than
8734 size 8 indirect. This does not match the FUNCTION_ARG macros. */
8736 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
8737 if (result)
8739 gimplify_assign (result, tmp, pre_p);
8740 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
8741 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
8742 gimplify_and_add (tmp, pre_p);
8744 else
8745 result = tmp;
8747 if (pass_by_ref)
8748 result = build_va_arg_indirect_ref (result);
8750 return result;
8753 /* 64 bit floating points memory transfers are paired single precision loads
8754 or store. So DWARF information needs fixing in little endian (unless
8755 PR=SZ=1 in FPSCR). */
8757 sh_dwarf_register_span (rtx reg)
8759 unsigned regno = REGNO (reg);
8761 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
8762 return NULL_RTX;
8764 return
8765 gen_rtx_PARALLEL (VOIDmode,
8766 gen_rtvec (2,
8767 gen_rtx_REG (SFmode, regno + 1),
8768 gen_rtx_REG (SFmode, regno)));
8771 static enum machine_mode
8772 sh_promote_function_mode (const_tree type, enum machine_mode mode,
8773 int *punsignedp, const_tree funtype,
8774 int for_return)
8776 if (sh_promote_prototypes (funtype))
8777 return promote_mode (type, mode, punsignedp);
8778 else
8779 return default_promote_function_mode (type, mode, punsignedp, funtype,
8780 for_return);
8783 static bool
8784 sh_promote_prototypes (const_tree type)
8786 if (TARGET_HITACHI)
8787 return false;
8788 if (! type)
8789 return true;
8790 return ! sh_attr_renesas_p (type);
8793 /* Whether an argument must be passed by reference. On SHcompact, we
8794 pretend arguments wider than 32-bits that would have been passed in
8795 registers are passed by reference, so that an SHmedia trampoline
8796 loads them into the full 64-bits registers. */
8797 static int
8798 shcompact_byref (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
8799 const_tree type, bool named)
8801 unsigned HOST_WIDE_INT size;
8803 if (type)
8804 size = int_size_in_bytes (type);
8805 else
8806 size = GET_MODE_SIZE (mode);
8808 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
8809 && (!named
8810 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
8811 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
8812 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
8813 && size > 4
8814 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
8815 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8816 return size;
8817 else
8818 return 0;
8821 static bool
8822 sh_pass_by_reference (cumulative_args_t cum_v, enum machine_mode mode,
8823 const_tree type, bool named)
8825 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8827 if (targetm.calls.must_pass_in_stack (mode, type))
8828 return true;
8830 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
8831 wants to know about pass-by-reference semantics for incoming
8832 arguments. */
8833 if (! cum)
8834 return false;
8836 if (TARGET_SHCOMPACT)
8838 cum->byref = shcompact_byref (cum, mode, type, named);
8839 return cum->byref != 0;
8842 return false;
8845 static bool
8846 sh_callee_copies (cumulative_args_t cum, enum machine_mode mode,
8847 const_tree type, bool named ATTRIBUTE_UNUSED)
8849 /* ??? How can it possibly be correct to return true only on the
8850 caller side of the equation? Is there someplace else in the
8851 sh backend that's magically producing the copies? */
8852 return (get_cumulative_args (cum)->outgoing
8853 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
8854 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
8857 /* Round a register number up to a proper boundary for an arg of mode
8858 MODE.
8859 The SH doesn't care about double alignment, so we only
8860 round doubles to even regs when asked to explicitly. */
8861 static int
8862 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
8864 /* FIXME: This used to be a macro and has been copy pasted into this
8865 function as is. Make this more readable. */
8866 return
8867 (((TARGET_ALIGN_DOUBLE
8868 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
8869 && (mode == DFmode || mode == DCmode)
8870 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
8871 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
8872 ? (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]
8873 + (cum.arg_count[(int) GET_SH_ARG_CLASS (mode)] & 1))
8874 : cum.arg_count[(int) GET_SH_ARG_CLASS (mode)]);
8877 /* Return true if arg of the specified mode should be be passed in a register
8878 or false otherwise. */
8879 static bool
8880 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
8881 const_tree type)
8883 /* FIXME: This used to be a macro and has been copy pasted into this
8884 function as is. Make this more readable. */
8885 return
8886 ((type == 0
8887 || (! TREE_ADDRESSABLE (type)
8888 && (! (TARGET_HITACHI || cum.renesas_abi)
8889 || ! (AGGREGATE_TYPE_P (type)
8890 || (!TARGET_FPU_ANY
8891 && (GET_MODE_CLASS (mode) == MODE_FLOAT
8892 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
8893 && ! cum.force_mem
8894 && (TARGET_SH2E
8895 ? ((mode) == BLKmode
8896 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
8897 + int_size_in_bytes (type))
8898 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
8899 : ((sh_round_reg (cum, mode)
8900 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
8901 <= NPARM_REGS (mode)))
8902 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
8905 static int
8906 sh_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
8907 tree type, bool named ATTRIBUTE_UNUSED)
8909 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8910 int words = 0;
8912 if (!TARGET_SH5
8913 && sh_pass_in_reg_p (*cum, mode, type)
8914 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
8915 && (sh_round_reg (*cum, mode)
8916 + (mode != BLKmode
8917 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
8918 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
8919 > NPARM_REGS (mode)))
8920 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
8922 else if (!TARGET_SHCOMPACT
8923 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
8924 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
8926 return words * UNITS_PER_WORD;
8930 /* Define where to put the arguments to a function.
8931 Value is zero to push the argument on the stack,
8932 or a hard register in which to store the argument.
8934 MODE is the argument's machine mode.
8935 TYPE is the data type of the argument (as a tree).
8936 This is null for libcalls where that information may
8937 not be available.
8938 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8939 the preceding args and about the function being called.
8940 NAMED is nonzero if this argument is a named parameter
8941 (otherwise it is an extra parameter matching an ellipsis).
8943 On SH the first args are normally in registers
8944 and the rest are pushed. Any arg that starts within the first
8945 NPARM_REGS words is at least partially passed in a register unless
8946 its data type forbids. */
8947 static rtx
8948 sh_function_arg (cumulative_args_t ca_v, enum machine_mode mode,
8949 const_tree type, bool named)
8951 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8953 if (! TARGET_SH5 && mode == VOIDmode)
8954 return GEN_INT (ca->renesas_abi ? 1 : 0);
8956 if (! TARGET_SH5
8957 && sh_pass_in_reg_p (*ca, mode, type)
8958 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8960 int regno;
8962 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8963 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8965 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8966 gen_rtx_REG (SFmode,
8967 BASE_ARG_REG (mode)
8968 + (sh_round_reg (*ca, mode) ^ 1)),
8969 const0_rtx);
8970 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8971 gen_rtx_REG (SFmode,
8972 BASE_ARG_REG (mode)
8973 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8974 GEN_INT (4));
8975 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8978 /* If the alignment of a DF value causes an SF register to be
8979 skipped, we will use that skipped register for the next SF
8980 value. */
8981 if ((TARGET_HITACHI || ca->renesas_abi)
8982 && ca->free_single_fp_reg
8983 && mode == SFmode)
8984 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8986 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8987 ^ (mode == SFmode && TARGET_SH4
8988 && TARGET_LITTLE_ENDIAN
8989 && ! TARGET_HITACHI && ! ca->renesas_abi);
8990 return gen_rtx_REG (mode, regno);
8994 if (TARGET_SH5)
8996 if (mode == VOIDmode && TARGET_SHCOMPACT)
8997 return GEN_INT (ca->call_cookie);
8999 /* The following test assumes unnamed arguments are promoted to
9000 DFmode. */
9001 if (mode == SFmode && ca->free_single_fp_reg)
9002 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
9004 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
9005 && (named || ! ca->prototype_p)
9006 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
9008 if (! ca->prototype_p && TARGET_SHMEDIA)
9009 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
9011 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
9012 FIRST_FP_PARM_REG
9013 + ca->arg_count[(int) SH_ARG_FLOAT]);
9016 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
9017 && (! TARGET_SHCOMPACT
9018 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
9019 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
9020 type, named))))
9022 return gen_rtx_REG (mode, (FIRST_PARM_REG
9023 + ca->arg_count[(int) SH_ARG_INT]));
9026 return NULL_RTX;
9029 return NULL_RTX;
9032 /* Update the data in CUM to advance over an argument
9033 of mode MODE and data type TYPE.
9034 (TYPE is null for libcalls where that information may not be
9035 available.) */
9036 static void
9037 sh_function_arg_advance (cumulative_args_t ca_v, enum machine_mode mode,
9038 const_tree type, bool named)
9040 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9042 if (ca->force_mem)
9043 ca->force_mem = 0;
9044 else if (TARGET_SH5)
9046 const_tree type2 = (ca->byref && type
9047 ? TREE_TYPE (type)
9048 : type);
9049 enum machine_mode mode2 = (ca->byref && type
9050 ? TYPE_MODE (type2)
9051 : mode);
9052 int dwords = ((ca->byref
9053 ? ca->byref
9054 : mode2 == BLKmode
9055 ? int_size_in_bytes (type2)
9056 : GET_MODE_SIZE (mode2)) + 7) / 8;
9057 int numregs = MIN (dwords, NPARM_REGS (SImode)
9058 - ca->arg_count[(int) SH_ARG_INT]);
9060 if (numregs)
9062 ca->arg_count[(int) SH_ARG_INT] += numregs;
9063 if (TARGET_SHCOMPACT
9064 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
9066 ca->call_cookie
9067 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9068 - numregs, 1);
9069 /* N.B. We want this also for outgoing. */
9070 ca->stack_regs += numregs;
9072 else if (ca->byref)
9074 if (! ca->outgoing)
9075 ca->stack_regs += numregs;
9076 ca->byref_regs += numregs;
9077 ca->byref = 0;
9079 ca->call_cookie
9080 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9081 - numregs, 2);
9082 while (--numregs);
9083 ca->call_cookie
9084 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
9085 - 1, 1);
9087 else if (dwords > numregs)
9089 int pushregs = numregs;
9091 if (TARGET_SHCOMPACT)
9092 ca->stack_regs += numregs;
9093 while (pushregs < NPARM_REGS (SImode) - 1
9094 && (CALL_COOKIE_INT_REG_GET
9095 (ca->call_cookie,
9096 NPARM_REGS (SImode) - pushregs)
9097 == 1))
9099 ca->call_cookie
9100 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
9101 - pushregs, 1);
9102 pushregs++;
9104 if (numregs == NPARM_REGS (SImode))
9105 ca->call_cookie
9106 |= CALL_COOKIE_INT_REG (0, 1)
9107 | CALL_COOKIE_STACKSEQ (numregs - 1);
9108 else
9109 ca->call_cookie
9110 |= CALL_COOKIE_STACKSEQ (numregs);
9113 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
9114 && (named || ! ca->prototype_p))
9116 if (mode2 == SFmode && ca->free_single_fp_reg)
9117 ca->free_single_fp_reg = 0;
9118 else if (ca->arg_count[(int) SH_ARG_FLOAT]
9119 < NPARM_REGS (SFmode))
9121 int numfpregs
9122 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
9123 NPARM_REGS (SFmode)
9124 - ca->arg_count[(int) SH_ARG_FLOAT]);
9126 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
9128 if (TARGET_SHCOMPACT && ! ca->prototype_p)
9130 if (ca->outgoing && numregs > 0)
9133 ca->call_cookie
9134 |= (CALL_COOKIE_INT_REG
9135 (ca->arg_count[(int) SH_ARG_INT]
9136 - numregs + ((numfpregs - 2) / 2),
9137 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
9138 - numfpregs) / 2));
9140 while (numfpregs -= 2);
9142 else if (mode2 == SFmode && (named)
9143 && (ca->arg_count[(int) SH_ARG_FLOAT]
9144 < NPARM_REGS (SFmode)))
9145 ca->free_single_fp_reg
9146 = FIRST_FP_PARM_REG - numfpregs
9147 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
9150 return;
9153 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
9155 /* Note that we've used the skipped register. */
9156 if (mode == SFmode && ca->free_single_fp_reg)
9158 ca->free_single_fp_reg = 0;
9159 return;
9161 /* When we have a DF after an SF, there's an SF register that get
9162 skipped in order to align the DF value. We note this skipped
9163 register, because the next SF value will use it, and not the
9164 SF that follows the DF. */
9165 if (mode == DFmode
9166 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
9168 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
9169 + BASE_ARG_REG (mode));
9173 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
9174 || sh_pass_in_reg_p (*ca, mode, type))
9175 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
9176 = (sh_round_reg (*ca, mode)
9177 + (mode == BLKmode
9178 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9179 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
9182 /* The Renesas calling convention doesn't quite fit into this scheme since
9183 the address is passed like an invisible argument, but one that is always
9184 passed in memory. */
9185 static rtx
9186 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
9188 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9189 return NULL_RTX;
9190 return gen_rtx_REG (Pmode, 2);
9193 /* Worker function for TARGET_FUNCTION_VALUE.
9195 For the SH, this is like LIBCALL_VALUE, except that we must change the
9196 mode like PROMOTE_MODE does.
9197 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
9198 tested here has to be kept in sync with the one in
9199 explow.c:promote_mode. */
9200 static rtx
9201 sh_function_value (const_tree valtype,
9202 const_tree fn_decl_or_type,
9203 bool outgoing ATTRIBUTE_UNUSED)
9205 if (fn_decl_or_type
9206 && !DECL_P (fn_decl_or_type))
9207 fn_decl_or_type = NULL;
9209 return gen_rtx_REG (
9210 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
9211 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
9212 && (TREE_CODE (valtype) == INTEGER_TYPE
9213 || TREE_CODE (valtype) == ENUMERAL_TYPE
9214 || TREE_CODE (valtype) == BOOLEAN_TYPE
9215 || TREE_CODE (valtype) == REAL_TYPE
9216 || TREE_CODE (valtype) == OFFSET_TYPE))
9217 && sh_promote_prototypes (fn_decl_or_type)
9218 ? (TARGET_SHMEDIA64 ? DImode : SImode) : TYPE_MODE (valtype)),
9219 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
9222 /* Worker function for TARGET_LIBCALL_VALUE. */
9223 static rtx
9224 sh_libcall_value (enum machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
9226 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
9229 /* Return true if N is a possible register number of function value. */
9230 static bool
9231 sh_function_value_regno_p (const unsigned int regno)
9233 return ((regno) == FIRST_RET_REG
9234 || (TARGET_SH2E && (regno) == FIRST_FP_RET_REG)
9235 || (TARGET_SHMEDIA_FPU && (regno) == FIRST_FP_RET_REG));
9238 /* Worker function for TARGET_RETURN_IN_MEMORY. */
9239 static bool
9240 sh_return_in_memory (const_tree type, const_tree fndecl)
9242 if (TARGET_SH5)
9244 if (TYPE_MODE (type) == BLKmode)
9245 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
9246 else
9247 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
9249 else
9251 return (TYPE_MODE (type) == BLKmode
9252 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
9253 && TREE_CODE (type) == RECORD_TYPE));
9257 /* We actually emit the code in sh_expand_prologue. We used to use
9258 a static variable to flag that we need to emit this code, but that
9259 doesn't when inlining, when functions are deferred and then emitted
9260 later. Fortunately, we already have two flags that are part of struct
9261 function that tell if a function uses varargs or stdarg. */
9262 static void
9263 sh_setup_incoming_varargs (cumulative_args_t ca,
9264 enum machine_mode mode,
9265 tree type,
9266 int *pretend_arg_size,
9267 int second_time ATTRIBUTE_UNUSED)
9269 gcc_assert (cfun->stdarg);
9270 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
9272 int named_parm_regs, anon_parm_regs;
9274 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
9275 + (mode == BLKmode
9276 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
9277 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
9278 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
9279 if (anon_parm_regs > 0)
9280 *pretend_arg_size = anon_parm_regs * 4;
9284 static bool
9285 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
9287 return TARGET_SH5;
9290 static bool
9291 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
9293 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
9295 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
9299 /* Define the offset between two registers, one to be eliminated, and
9300 the other its replacement, at the start of a routine. */
9302 initial_elimination_offset (int from, int to)
9304 int regs_saved;
9305 int regs_saved_rounding = 0;
9306 int total_saved_regs_space;
9307 int total_auto_space;
9308 int save_flags = target_flags;
9309 int copy_flags;
9310 HARD_REG_SET live_regs_mask;
9312 shmedia_space_reserved_for_target_registers = false;
9313 regs_saved = calc_live_regs (&live_regs_mask);
9314 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
9316 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
9318 shmedia_space_reserved_for_target_registers = true;
9319 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
9322 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
9323 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
9324 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
9326 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
9327 copy_flags = target_flags;
9328 target_flags = save_flags;
9330 total_saved_regs_space = regs_saved + regs_saved_rounding;
9332 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9333 return total_saved_regs_space + total_auto_space
9334 + crtl->args.info.byref_regs * 8;
9336 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9337 return total_saved_regs_space + total_auto_space
9338 + crtl->args.info.byref_regs * 8;
9340 /* Initial gap between fp and sp is 0. */
9341 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9342 return 0;
9344 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
9345 return rounded_frame_size (0);
9347 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
9348 return rounded_frame_size (0);
9350 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
9351 && (to == HARD_FRAME_POINTER_REGNUM
9352 || to == STACK_POINTER_REGNUM));
9353 if (TARGET_SH5)
9355 int n = total_saved_regs_space;
9356 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
9357 save_schedule schedule;
9358 save_entry *entry;
9360 n += total_auto_space;
9362 /* If it wasn't saved, there's not much we can do. */
9363 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
9364 return n;
9366 target_flags = copy_flags;
9368 sh5_schedule_saves (&live_regs_mask, &schedule, n);
9369 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
9370 if (entry->reg == pr_reg)
9372 target_flags = save_flags;
9373 return entry->offset;
9375 gcc_unreachable ();
9377 else
9378 return total_auto_space;
9381 /* Parse the -mfixed-range= option string. */
9382 void
9383 sh_fix_range (const char *const_str)
9385 int i, first, last;
9386 char *str, *dash, *comma;
9388 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
9389 REG2 are either register names or register numbers. The effect
9390 of this option is to mark the registers in the range from REG1 to
9391 REG2 as ``fixed'' so they won't be used by the compiler. */
9393 i = strlen (const_str);
9394 str = (char *) alloca (i + 1);
9395 memcpy (str, const_str, i + 1);
9397 while (1)
9399 dash = strchr (str, '-');
9400 if (!dash)
9402 warning (0, "value of -mfixed-range must have form REG1-REG2");
9403 return;
9405 *dash = '\0';
9406 comma = strchr (dash + 1, ',');
9407 if (comma)
9408 *comma = '\0';
9410 first = decode_reg_name (str);
9411 if (first < 0)
9413 warning (0, "unknown register name: %s", str);
9414 return;
9417 last = decode_reg_name (dash + 1);
9418 if (last < 0)
9420 warning (0, "unknown register name: %s", dash + 1);
9421 return;
9424 *dash = '-';
9426 if (first > last)
9428 warning (0, "%s-%s is an empty range", str, dash + 1);
9429 return;
9432 for (i = first; i <= last; ++i)
9433 fixed_regs[i] = call_used_regs[i] = 1;
9435 if (!comma)
9436 break;
9438 *comma = ',';
9439 str = comma + 1;
9443 /* Insert any deferred function attributes from earlier pragmas. */
9444 static void
9445 sh_insert_attributes (tree node, tree *attributes)
9447 tree attrs;
9449 if (TREE_CODE (node) != FUNCTION_DECL)
9450 return;
9452 /* We are only interested in fields. */
9453 if (!DECL_P (node))
9454 return;
9456 /* Append the attributes to the deferred attributes. */
9457 *sh_deferred_function_attributes_tail = *attributes;
9458 attrs = sh_deferred_function_attributes;
9459 if (!attrs)
9460 return;
9462 /* Some attributes imply or require the interrupt attribute. */
9463 if (!lookup_attribute ("interrupt_handler", attrs)
9464 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
9466 /* If we have a trapa_handler, but no interrupt_handler attribute,
9467 insert an interrupt_handler attribute. */
9468 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
9469 /* We can't use sh_pr_interrupt here because that's not in the
9470 java frontend. */
9471 attrs
9472 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
9473 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
9474 if the interrupt attribute is missing, we ignore the attribute
9475 and warn. */
9476 else if (lookup_attribute ("sp_switch", attrs)
9477 || lookup_attribute ("trap_exit", attrs)
9478 || lookup_attribute ("nosave_low_regs", attrs)
9479 || lookup_attribute ("resbank", attrs))
9481 tree *tail;
9483 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
9485 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
9486 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
9487 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
9488 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
9489 warning (OPT_Wattributes,
9490 "%qE attribute only applies to interrupt functions",
9491 TREE_PURPOSE (attrs));
9492 else
9494 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
9495 NULL_TREE);
9496 tail = &TREE_CHAIN (*tail);
9499 attrs = *attributes;
9503 /* Install the processed list. */
9504 *attributes = attrs;
9506 /* Clear deferred attributes. */
9507 sh_deferred_function_attributes = NULL_TREE;
9508 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
9510 return;
9513 /*------------------------------------------------------------------------------
9514 Target specific attributes
9515 Supported attributes are:
9517 * interrupt_handler
9518 Specifies this function is an interrupt handler.
9520 * trapa_handler
9521 Like interrupt_handler, but don't save all registers.
9523 * sp_switch
9524 Specifies an alternate stack for an interrupt handler to run on.
9526 * trap_exit
9527 Use a trapa to exit an interrupt function instead of rte.
9529 * nosave_low_regs
9530 Don't save r0..r7 in an interrupt handler function.
9531 This is useful on SH3* and SH4*, which have a separate set of low
9532 regs for user and privileged modes.
9533 This is mainly to be used for non-reentrant interrupt handlers (i.e.
9534 those that run with interrupts disabled and thus can't be
9535 interrupted thenselves).
9537 * renesas
9538 Use Renesas calling/layout conventions (functions and structures).
9540 * resbank
9541 In case of an interrupt handler function, use a register bank to
9542 save registers R0-R14, MACH, MACL, GBR and PR.
9543 This is available only on SH2A targets.
9545 * function_vector
9546 Declares a function to be called using the TBR relative addressing
9547 mode. Takes an argument that specifies the slot number in the table
9548 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
9551 /* Handle a 'resbank' attribute. */
9552 static tree
9553 sh_handle_resbank_handler_attribute (tree * node, tree name,
9554 tree args ATTRIBUTE_UNUSED,
9555 int flags ATTRIBUTE_UNUSED,
9556 bool * no_add_attrs)
9558 if (!TARGET_SH2A)
9560 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
9561 name);
9562 *no_add_attrs = true;
9564 if (TREE_CODE (*node) != FUNCTION_DECL)
9566 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9567 name);
9568 *no_add_attrs = true;
9571 return NULL_TREE;
9574 /* Handle an "interrupt_handler" attribute; arguments as in
9575 struct attribute_spec.handler. */
9576 static tree
9577 sh_handle_interrupt_handler_attribute (tree *node, tree name,
9578 tree args ATTRIBUTE_UNUSED,
9579 int flags ATTRIBUTE_UNUSED,
9580 bool *no_add_attrs)
9582 if (TREE_CODE (*node) != FUNCTION_DECL)
9584 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9585 name);
9586 *no_add_attrs = true;
9588 else if (TARGET_SHCOMPACT)
9590 error ("attribute interrupt_handler is not compatible with -m5-compact");
9591 *no_add_attrs = true;
9594 return NULL_TREE;
9597 /* Handle an 'function_vector' attribute; arguments as in
9598 struct attribute_spec.handler. */
9599 static tree
9600 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
9601 tree args ATTRIBUTE_UNUSED,
9602 int flags ATTRIBUTE_UNUSED,
9603 bool * no_add_attrs)
9605 if (!TARGET_SH2A)
9607 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
9608 name);
9609 *no_add_attrs = true;
9611 else if (TREE_CODE (*node) != FUNCTION_DECL)
9613 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9614 name);
9615 *no_add_attrs = true;
9617 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9619 /* The argument must be a constant integer. */
9620 warning (OPT_Wattributes,
9621 "%qE attribute argument not an integer constant",
9622 name);
9623 *no_add_attrs = true;
9625 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
9627 /* The argument value must be between 0 to 255. */
9628 warning (OPT_Wattributes,
9629 "%qE attribute argument should be between 0 to 255",
9630 name);
9631 *no_add_attrs = true;
9633 return NULL_TREE;
9636 /* Returns true if current function has been assigned the attribute
9637 'function_vector'. */
9638 bool
9639 sh2a_is_function_vector_call (rtx x)
9641 if (GET_CODE (x) == SYMBOL_REF
9642 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9644 tree tr = SYMBOL_REF_DECL (x);
9646 if (sh2a_function_vector_p (tr))
9647 return true;
9650 return false;
9653 /* Returns the function vector number, if the attribute
9654 'function_vector' is assigned, otherwise returns zero. */
9656 sh2a_get_function_vector_number (rtx x)
9658 int num;
9659 tree list, t;
9661 if ((GET_CODE (x) == SYMBOL_REF)
9662 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
9664 t = SYMBOL_REF_DECL (x);
9666 if (TREE_CODE (t) != FUNCTION_DECL)
9667 return 0;
9669 list = SH_ATTRIBUTES (t);
9670 while (list)
9672 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9674 num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
9675 return num;
9678 list = TREE_CHAIN (list);
9681 return 0;
9683 else
9684 return 0;
9687 /* Handle an "sp_switch" attribute; arguments as in
9688 struct attribute_spec.handler. */
9689 static tree
9690 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
9691 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9693 if (TREE_CODE (*node) != FUNCTION_DECL)
9695 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9696 name);
9697 *no_add_attrs = true;
9699 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
9701 /* The argument must be a constant string. */
9702 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
9703 name);
9704 *no_add_attrs = true;
9707 return NULL_TREE;
9710 /* Handle an "trap_exit" attribute; arguments as in
9711 struct attribute_spec.handler. */
9712 static tree
9713 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
9714 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
9716 if (TREE_CODE (*node) != FUNCTION_DECL)
9718 warning (OPT_Wattributes, "%qE attribute only applies to functions",
9719 name);
9720 *no_add_attrs = true;
9722 /* The argument specifies a trap number to be used in a trapa instruction
9723 at function exit (instead of an rte instruction). */
9724 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
9726 /* The argument must be a constant integer. */
9727 warning (OPT_Wattributes, "%qE attribute argument not an "
9728 "integer constant", name);
9729 *no_add_attrs = true;
9732 return NULL_TREE;
9735 static tree
9736 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
9737 tree name ATTRIBUTE_UNUSED,
9738 tree args ATTRIBUTE_UNUSED,
9739 int flags ATTRIBUTE_UNUSED,
9740 bool *no_add_attrs ATTRIBUTE_UNUSED)
9742 return NULL_TREE;
9745 /* True if __attribute__((renesas)) or -mrenesas. */
9746 bool
9747 sh_attr_renesas_p (const_tree td)
9749 if (TARGET_HITACHI)
9750 return true;
9751 if (td == NULL_TREE)
9752 return false;
9753 if (DECL_P (td))
9754 td = TREE_TYPE (td);
9755 if (td == error_mark_node)
9756 return false;
9757 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
9758 != NULL_TREE);
9761 /* True if __attribute__((renesas)) or -mrenesas, for the current
9762 function. */
9763 bool
9764 sh_cfun_attr_renesas_p (void)
9766 return sh_attr_renesas_p (current_function_decl);
9769 /* Returns true if the current function has the "interrupt_handler"
9770 attribute set. */
9771 bool
9772 sh_cfun_interrupt_handler_p (void)
9774 return (lookup_attribute ("interrupt_handler",
9775 DECL_ATTRIBUTES (current_function_decl))
9776 != NULL_TREE);
9779 /* Returns true if FUNC has been assigned the attribute
9780 "function_vector". */
9781 bool
9782 sh2a_function_vector_p (tree func)
9784 tree list;
9785 if (TREE_CODE (func) != FUNCTION_DECL)
9786 return false;
9788 list = SH_ATTRIBUTES (func);
9789 while (list)
9791 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
9792 return true;
9794 list = TREE_CHAIN (list);
9796 return false;
9799 /* Returns true if given tree has the "resbank" attribute set. */
9800 bool
9801 sh_cfun_resbank_handler_p (void)
9803 return ((lookup_attribute ("resbank",
9804 DECL_ATTRIBUTES (current_function_decl))
9805 != NULL_TREE)
9806 && (lookup_attribute ("interrupt_handler",
9807 DECL_ATTRIBUTES (current_function_decl))
9808 != NULL_TREE) && TARGET_SH2A);
9811 /* Returns true if the current function has a "trap_exit" attribute set. */
9812 bool
9813 sh_cfun_trap_exit_p (void)
9815 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
9816 != NULL_TREE;
9819 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
9820 static const char *
9821 sh_check_pch_target_flags (int old_flags)
9823 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
9824 | MASK_SH_E | MASK_HARD_SH4
9825 | MASK_FPU_SINGLE | MASK_SH4))
9826 return _("created and used with different architectures / ABIs");
9827 if ((old_flags ^ target_flags) & MASK_HITACHI)
9828 return _("created and used with different ABIs");
9829 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
9830 return _("created and used with different endianness");
9831 return NULL;
9834 /* Predicates used by the templates. */
9836 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
9837 Used only in general_movsrc_operand. */
9838 bool
9839 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9841 switch (REGNO (op))
9843 case PR_REG:
9844 case MACL_REG:
9845 case MACH_REG:
9846 return true;
9848 return false;
9851 /* Returns true if OP is a floating point value with value 0.0. */
9852 bool
9853 fp_zero_operand (rtx op)
9855 REAL_VALUE_TYPE r;
9857 if (GET_MODE (op) != SFmode)
9858 return false;
9860 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9861 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
9864 /* Returns true if OP is a floating point value with value 1.0. */
9865 bool
9866 fp_one_operand (rtx op)
9868 REAL_VALUE_TYPE r;
9870 if (GET_MODE (op) != SFmode)
9871 return false;
9873 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
9874 return REAL_VALUES_EQUAL (r, dconst1);
9877 /* In general mode switching is used. If we are
9878 compiling without -mfmovd, movsf_ie isn't taken into account for
9879 mode switching. We could check in machine_dependent_reorg for
9880 cases where we know we are in single precision mode, but there is
9881 interface to find that out during reload, so we must avoid
9882 choosing an fldi alternative during reload and thus failing to
9883 allocate a scratch register for the constant loading. */
9884 bool
9885 fldi_ok (void)
9887 return true;
9890 /* Return the TLS type for TLS symbols. */
9891 enum tls_model
9892 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
9894 if (GET_CODE (op) != SYMBOL_REF)
9895 return TLS_MODEL_NONE;
9896 return SYMBOL_REF_TLS_MODEL (op);
9899 /* Return the destination address of a branch. */
9900 static int
9901 branch_dest (rtx branch)
9903 rtx dest = SET_SRC (PATTERN (branch));
9904 int dest_uid;
9906 if (GET_CODE (dest) == IF_THEN_ELSE)
9907 dest = XEXP (dest, 1);
9908 dest = XEXP (dest, 0);
9909 dest_uid = INSN_UID (dest);
9910 return INSN_ADDRESSES (dest_uid);
9913 /* Return nonzero if REG is not used after INSN.
9914 We assume REG is a reload reg, and therefore does
9915 not live past labels. It may live past calls or jumps though. */
9916 bool
9917 reg_unused_after (rtx reg, rtx_insn *insn)
9919 enum rtx_code code;
9920 rtx set;
9922 /* If the reg is set by this instruction, then it is safe for our
9923 case. Disregard the case where this is a store to memory, since
9924 we are checking a register used in the store address. */
9925 set = single_set (insn);
9926 if (set && !MEM_P (SET_DEST (set))
9927 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9928 return true;
9930 while ((insn = NEXT_INSN (insn)))
9932 rtx set;
9933 if (!INSN_P (insn))
9934 continue;
9936 code = GET_CODE (insn);
9938 #if 0
9939 /* If this is a label that existed before reload, then the register
9940 is dead here. However, if this is a label added by reorg, then
9941 the register may still be live here. We can't tell the difference,
9942 so we just ignore labels completely. */
9943 if (code == CODE_LABEL)
9944 return 1;
9945 /* else */
9946 #endif
9948 if (code == JUMP_INSN)
9949 return false;
9951 /* If this is a sequence, we must handle them all at once.
9952 We could have for instance a call that sets the target register,
9953 and an insn in a delay slot that uses the register. In this case,
9954 we must return 0. */
9955 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
9957 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
9958 int i;
9959 int retval = 0;
9961 for (i = 0; i < seq->len (); i++)
9963 rtx_insn *this_insn = seq->insn (i);
9964 rtx set = single_set (this_insn);
9966 if (CALL_P (this_insn))
9967 code = CALL_INSN;
9968 else if (JUMP_P (this_insn))
9970 if (INSN_ANNULLED_BRANCH_P (this_insn))
9971 return false;
9972 code = JUMP_INSN;
9975 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9976 return false;
9977 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9979 if (!MEM_P (SET_DEST (set)))
9980 retval = true;
9981 else
9982 return false;
9984 if (set == NULL_RTX
9985 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
9986 return false;
9988 if (retval == 1)
9989 return true;
9990 else if (code == JUMP_INSN)
9991 return false;
9994 set = single_set (insn);
9995 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
9996 return false;
9997 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
9998 return !MEM_P (SET_DEST (set));
9999 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
10000 return false;
10002 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
10003 return true;
10005 return true;
10008 #include "ggc.h"
10010 static GTY(()) rtx t_reg_rtx;
10012 get_t_reg_rtx (void)
10014 if (! t_reg_rtx)
10015 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
10016 return t_reg_rtx;
10019 static GTY(()) rtx fpscr_rtx;
10021 get_fpscr_rtx (void)
10023 if (! fpscr_rtx)
10025 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
10026 REG_USERVAR_P (fpscr_rtx) = 1;
10027 mark_user_reg (fpscr_rtx);
10029 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
10030 mark_user_reg (fpscr_rtx);
10031 return fpscr_rtx;
10034 static GTY(()) tree fpscr_values;
10036 static void
10037 emit_fpu_switch (rtx scratch, int index)
10039 rtx dst, src;
10041 if (fpscr_values == NULL)
10043 tree t;
10045 t = build_index_type (integer_one_node);
10046 t = build_array_type (integer_type_node, t);
10047 t = build_decl (BUILTINS_LOCATION,
10048 VAR_DECL, get_identifier ("__fpscr_values"), t);
10049 DECL_ARTIFICIAL (t) = 1;
10050 DECL_IGNORED_P (t) = 1;
10051 DECL_EXTERNAL (t) = 1;
10052 TREE_STATIC (t) = 1;
10053 TREE_PUBLIC (t) = 1;
10054 TREE_USED (t) = 1;
10056 fpscr_values = t;
10059 src = DECL_RTL (fpscr_values);
10060 if (!can_create_pseudo_p ())
10062 emit_move_insn (scratch, XEXP (src, 0));
10063 if (index != 0)
10064 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
10065 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
10067 else
10068 src = adjust_address (src, PSImode, index * 4);
10070 dst = get_fpscr_rtx ();
10071 emit_move_insn (dst, src);
10074 void
10075 emit_sf_insn (rtx pat)
10077 emit_insn (pat);
10080 void
10081 emit_df_insn (rtx pat)
10083 emit_insn (pat);
10086 void
10087 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10089 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10092 void
10093 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10095 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
10096 get_fpscr_rtx ()));
10099 void
10100 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
10102 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
10105 void
10106 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
10108 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
10109 get_fpscr_rtx ()));
10112 static rtx get_free_reg (HARD_REG_SET);
10114 /* This function returns a register to use to load the address to load
10115 the fpscr from. Currently it always returns r1 or r7, but when we are
10116 able to use pseudo registers after combine, or have a better mechanism
10117 for choosing a register, it should be done here. */
10118 /* REGS_LIVE is the liveness information for the point for which we
10119 need this allocation. In some bare-bones exit blocks, r1 is live at the
10120 start. We can even have all of r0..r3 being live:
10121 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
10122 INSN before which new insns are placed with will clobber the register
10123 we return. If a basic block consists only of setting the return value
10124 register to a pseudo and using that register, the return value is not
10125 live before or after this block, yet we we'll insert our insns right in
10126 the middle. */
10127 static rtx
10128 get_free_reg (HARD_REG_SET regs_live)
10130 if (! TEST_HARD_REG_BIT (regs_live, 1))
10131 return gen_rtx_REG (Pmode, 1);
10133 /* Hard reg 1 is live; since this is a small register classes target,
10134 there shouldn't be anything but a jump before the function end. */
10135 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
10136 return gen_rtx_REG (Pmode, 7);
10139 /* This function will set the fpscr from memory.
10140 MODE is the mode we are setting it to. */
10141 void
10142 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
10144 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
10145 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
10146 rtx addr_reg;
10148 addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
10149 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
10152 /* Is the given character a logical line separator for the assembler? */
10153 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
10154 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
10155 #endif
10157 static bool
10158 sequence_insn_p (rtx_insn *insn)
10160 rtx_insn *prev, *next;
10162 prev = PREV_INSN (insn);
10163 if (prev == NULL)
10164 return false;
10166 next = NEXT_INSN (prev);
10167 if (next == NULL)
10168 return false;
10170 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
10174 sh_insn_length_adjustment (rtx_insn *insn)
10176 /* Instructions with unfilled delay slots take up an extra two bytes for
10177 the nop in the delay slot. */
10178 if (((NONJUMP_INSN_P (insn)
10179 && GET_CODE (PATTERN (insn)) != USE
10180 && GET_CODE (PATTERN (insn)) != CLOBBER)
10181 || CALL_P (insn) || JUMP_P (insn))
10182 && ! sequence_insn_p (insn)
10183 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
10184 return 2;
10186 /* SH2e has a bug that prevents the use of annulled branches, so if
10187 the delay slot is not filled, we'll have to put a NOP in it. */
10188 if (sh_cpu_attr == CPU_SH2E
10189 && JUMP_P (insn)
10190 && get_attr_type (insn) == TYPE_CBRANCH
10191 && ! sequence_insn_p (insn))
10192 return 2;
10194 /* sh-dsp parallel processing insn take four bytes instead of two. */
10196 if (NONJUMP_INSN_P (insn))
10198 int sum = 0;
10199 rtx body = PATTERN (insn);
10200 const char *templ;
10201 char c;
10202 bool maybe_label = true;
10204 if (GET_CODE (body) == ASM_INPUT)
10205 templ = XSTR (body, 0);
10206 else if (asm_noperands (body) >= 0)
10207 templ
10208 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
10209 else
10210 return 0;
10213 int ppi_adjust = 0;
10216 c = *templ++;
10217 while (c == ' ' || c == '\t');
10218 /* all sh-dsp parallel-processing insns start with p.
10219 The only non-ppi sh insn starting with p is pref.
10220 The only ppi starting with pr is prnd. */
10221 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
10222 ppi_adjust = 2;
10223 /* The repeat pseudo-insn expands two three insns, a total of
10224 six bytes in size. */
10225 else if ((c == 'r' || c == 'R')
10226 && ! strncasecmp ("epeat", templ, 5))
10227 ppi_adjust = 4;
10228 while (c && c != '\n'
10229 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
10231 /* If this is a label, it is obviously not a ppi insn. */
10232 if (c == ':' && maybe_label)
10234 ppi_adjust = 0;
10235 break;
10237 else if (c == '\'' || c == '"')
10238 maybe_label = false;
10239 c = *templ++;
10241 sum += ppi_adjust;
10242 maybe_label = c != ':';
10244 while (c);
10245 return sum;
10247 return 0;
10250 /* Return TRUE for a valid displacement for the REG+disp addressing
10251 with MODE. */
10252 bool
10253 sh_legitimate_index_p (enum machine_mode mode, rtx op, bool consider_sh2a,
10254 bool allow_zero)
10256 if (! CONST_INT_P (op))
10257 return false;
10259 if (TARGET_SHMEDIA)
10261 int size;
10263 /* Check if this is the address of an unaligned load / store. */
10264 if (mode == VOIDmode)
10265 return satisfies_constraint_I06 (op);
10267 size = GET_MODE_SIZE (mode);
10268 return (!(INTVAL (op) & (size - 1))
10269 && INTVAL (op) >= -512 * size
10270 && INTVAL (op) < 512 * size);
10272 else
10274 const HOST_WIDE_INT offset = INTVAL (op);
10275 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
10276 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
10278 /* If the mode does not support any displacement always return false.
10279 Even though an index of '0' is actually always valid, it will cause
10280 troubles when e.g. a DFmode move is split into two SFmode moves,
10281 where one SFmode move will have index '0' and the other move will
10282 have index '4'. */
10283 if (!allow_zero && max_disp < 1)
10284 return false;
10286 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
10290 /* Recognize an RTL expression that is a valid memory address for
10291 an instruction.
10292 The MODE argument is the machine mode for the MEM expression
10293 that wants to use this address.
10294 Allow REG
10295 REG+disp
10296 REG+r0
10297 REG++
10298 --REG
10300 GBR+disp */
10301 static bool
10302 sh_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
10304 if (! ALLOW_INDEXED_ADDRESS
10305 && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
10306 return false;
10308 if (REG_P (x) && REGNO (x) == GBR_REG)
10309 return true;
10311 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
10312 return true;
10313 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
10314 && ! TARGET_SHMEDIA
10315 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
10316 return true;
10317 else if (GET_CODE (x) == PLUS
10318 && (mode != PSImode || reload_completed))
10320 rtx xop0 = XEXP (x, 0);
10321 rtx xop1 = XEXP (x, 1);
10323 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
10324 return gbr_displacement (xop1, mode);
10326 if (GET_MODE_SIZE (mode) <= 8
10327 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
10328 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
10329 return true;
10331 if ((ALLOW_INDEXED_ADDRESS || GET_MODE (x) == DImode
10332 || ((xop0 == stack_pointer_rtx
10333 || xop0 == hard_frame_pointer_rtx)
10334 && REG_P (xop1) && REGNO (xop1) == R0_REG)
10335 || ((xop1 == stack_pointer_rtx
10336 || xop1 == hard_frame_pointer_rtx)
10337 && REG_P (xop0) && REGNO (xop0) == R0_REG))
10338 && ((!TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 4)
10339 || (TARGET_SHMEDIA && GET_MODE_SIZE (mode) <= 8)
10340 || ((TARGET_SH4 || TARGET_SH2A_DOUBLE)
10341 && TARGET_FMOVD && mode == DFmode)))
10343 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
10344 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
10345 return true;
10346 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
10347 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
10348 return true;
10352 return false;
10355 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
10356 isn't protected by a PIC unspec. */
10357 bool
10358 nonpic_symbol_mentioned_p (rtx x)
10360 const char *fmt;
10361 int i;
10363 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
10364 || GET_CODE (x) == PC)
10365 return true;
10367 /* We don't want to look into the possible MEM location of a
10368 CONST_DOUBLE, since we're not going to use it, in general. */
10369 if (GET_CODE (x) == CONST_DOUBLE)
10370 return false;
10372 if (GET_CODE (x) == UNSPEC
10373 && (XINT (x, 1) == UNSPEC_PIC
10374 || XINT (x, 1) == UNSPEC_GOT
10375 || XINT (x, 1) == UNSPEC_GOTOFF
10376 || XINT (x, 1) == UNSPEC_GOTPLT
10377 || XINT (x, 1) == UNSPEC_GOTTPOFF
10378 || XINT (x, 1) == UNSPEC_DTPOFF
10379 || XINT (x, 1) == UNSPEC_TPOFF
10380 || XINT (x, 1) == UNSPEC_PLT
10381 || XINT (x, 1) == UNSPEC_SYMOFF
10382 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF))
10383 return false;
10385 fmt = GET_RTX_FORMAT (GET_CODE (x));
10386 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10388 if (fmt[i] == 'E')
10390 int j;
10391 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10392 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
10393 return true;
10395 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
10396 return true;
10399 return false;
10402 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
10403 @GOTOFF in `reg'. */
10405 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
10406 rtx reg)
10408 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
10409 return orig;
10411 if (GET_CODE (orig) == LABEL_REF
10412 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
10414 if (reg == NULL_RTX)
10415 reg = gen_reg_rtx (Pmode);
10417 emit_insn (gen_symGOTOFF2reg (reg, orig));
10418 return reg;
10420 else if (GET_CODE (orig) == SYMBOL_REF)
10422 if (reg == NULL_RTX)
10423 reg = gen_reg_rtx (Pmode);
10425 emit_insn (gen_symGOT2reg (reg, orig));
10426 return reg;
10428 return orig;
10431 /* Given a (logical) mode size and an offset in bytes, try to find a the
10432 appropriate displacement value for a mov insn. On SH the displacements
10433 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
10434 15 bytes in QImode. To compensate this we create a new base address by
10435 adding an adjustment value to it.
10437 If the originally requested offset is greater than 127 we prefer using
10438 values 124..127 over 128..131 to increase opportunities to use the
10439 add #imm, Rn insn.
10441 In some cases it is possible that a requested offset might seem unaligned
10442 or inappropriate for the mode size, like offset = 2 and mode size = 4.
10443 This is compensated by adjusting the base address so that the effective
10444 address of the displacement move insn will be aligned.
10446 This is not the best possible way of rebasing the base address, as it
10447 does not look at other present displacement addressings around it.
10448 In some cases this can create more base address adjustments than would
10449 actually be necessary. */
10450 struct disp_adjust
10452 rtx offset_adjust;
10453 rtx mov_disp;
10456 static struct disp_adjust
10457 sh_find_mov_disp_adjust (enum machine_mode mode, HOST_WIDE_INT offset)
10459 struct disp_adjust res = { NULL_RTX, NULL_RTX };
10461 /* Do not try to use SH2A's large displacements here, because this would
10462 effectively disable the small displacement insns. */
10463 const int mode_sz = GET_MODE_SIZE (mode);
10464 const int mov_insn_sz = mov_insn_size (mode, false);
10465 const int max_disp = sh_max_mov_insn_displacement (mode, false);
10466 const int max_disp_next = max_disp + mov_insn_sz;
10467 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
10468 HOST_WIDE_INT offset_adjust;
10470 /* In some cases this actually does happen and we must check for it. */
10471 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
10472 return res;
10474 /* Keeps the previous behavior for QImode displacement addressing.
10475 This just decides how the offset is re-based. Removing this special
10476 case will result in slightly bigger code on average, but it's not that
10477 bad actually. */
10478 if (mov_insn_sz == 1)
10479 align_modifier = 0;
10481 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
10483 if (mode_sz + offset - offset_adjust <= max_disp_next)
10485 res.offset_adjust = GEN_INT (offset_adjust);
10486 res.mov_disp = GEN_INT (offset - offset_adjust);
10489 return res;
10492 /* Try to modify an illegitimate address and make it legitimate.
10493 If we find one, return the new, valid address.
10494 Otherwise, return the original address. */
10495 static rtx
10496 sh_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
10498 if (flag_pic)
10499 x = legitimize_pic_address (oldx, mode, NULL_RTX);
10501 if (TARGET_SHMEDIA)
10502 return x;
10504 if (((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
10505 || (TARGET_SH2E && mode == SFmode))
10506 return x;
10508 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
10509 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
10511 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
10512 INTVAL (XEXP (x, 1)));
10514 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10516 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
10517 adj.offset_adjust, NULL_RTX, 0,
10518 OPTAB_LIB_WIDEN);
10519 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10523 return x;
10526 /* Attempt to replace *p, which is an address that needs reloading, with
10527 a valid memory address for an operand of mode MODE.
10528 Like for sh_legitimize_address, for the SH we try to get a normal form
10529 of the address. That will allow inheritance of the address reloads. */
10530 bool
10531 sh_legitimize_reload_address (rtx *p, enum machine_mode mode, int opnum,
10532 int itype)
10534 enum reload_type type = (enum reload_type) itype;
10535 const int mode_sz = GET_MODE_SIZE (mode);
10537 if (! ALLOW_INDEXED_ADDRESS
10538 && GET_CODE (*p) == PLUS
10539 && REG_P (XEXP (*p, 0)) && REG_P (XEXP (*p, 1)))
10541 *p = copy_rtx (*p);
10542 push_reload (*p, NULL_RTX, p, NULL,
10543 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10544 return true;
10547 if (! ALLOW_INDEXED_ADDRESS
10548 && GET_CODE (*p) == PLUS
10549 && GET_CODE (XEXP (*p, 0)) == PLUS)
10551 rtx sum = gen_rtx_PLUS (Pmode, XEXP (XEXP (*p, 0), 0),
10552 XEXP (XEXP (*p, 0), 1));
10553 *p = gen_rtx_PLUS (Pmode, sum, XEXP (*p, 1));
10554 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10555 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10556 return true;
10559 if (TARGET_SHMEDIA)
10560 return false;
10562 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
10563 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)
10564 && ! (mode == PSImode && type == RELOAD_FOR_INPUT_ADDRESS)
10565 && (ALLOW_INDEXED_ADDRESS
10566 || XEXP (*p, 0) == stack_pointer_rtx
10567 || XEXP (*p, 0) == hard_frame_pointer_rtx))
10569 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
10570 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
10572 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
10574 push_reload (*p, NULL_RTX, p, NULL,
10575 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10576 return true;
10579 if (TARGET_SH2E && mode == SFmode)
10581 *p = copy_rtx (*p);
10582 push_reload (*p, NULL_RTX, p, NULL,
10583 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10584 return true;
10587 /* FIXME: Do not allow to legitimize QImode and HImode displacement
10588 moves because then reload has a problem figuring the constraint
10589 that the move insn target/source reg must be R0.
10590 Or maybe some handling is wrong in sh_secondary_reload for this
10591 to work properly? */
10592 if ((mode_sz == 4 || mode_sz == 8)
10593 && ! (TARGET_SH4 && mode == DFmode)
10594 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
10596 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
10597 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
10598 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
10599 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10600 return true;
10604 /* We must re-recognize what we created before. */
10605 if (GET_CODE (*p) == PLUS
10606 && (mode_sz == 4 || mode_sz == 8)
10607 && GET_CODE (XEXP (*p, 0)) == PLUS
10608 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
10609 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
10610 && CONST_INT_P (XEXP (*p, 1))
10611 && ! (TARGET_SH2E && mode == SFmode))
10613 /* Because this address is so complex, we know it must have
10614 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
10615 it is already unshared, and needs no further unsharing. */
10616 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
10617 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
10618 return true;
10621 return false;
10624 /* In the name of slightly smaller debug output, and to cater to
10625 general assembler lossage, recognize various UNSPEC sequences
10626 and turn them back into a direct symbol reference. */
10627 static rtx
10628 sh_delegitimize_address (rtx orig_x)
10630 rtx x, y;
10632 orig_x = delegitimize_mem_from_attrs (orig_x);
10634 x = orig_x;
10635 if (MEM_P (x))
10636 x = XEXP (x, 0);
10637 if (GET_CODE (x) == CONST)
10639 y = XEXP (x, 0);
10640 if (GET_CODE (y) == UNSPEC)
10642 if (XINT (y, 1) == UNSPEC_GOT
10643 || XINT (y, 1) == UNSPEC_GOTOFF
10644 || XINT (y, 1) == UNSPEC_SYMOFF)
10645 return XVECEXP (y, 0, 0);
10646 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
10648 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
10650 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
10652 if (GET_CODE (symplt) == UNSPEC
10653 && XINT (symplt, 1) == UNSPEC_PLT)
10654 return XVECEXP (symplt, 0, 0);
10657 else if (TARGET_SHMEDIA
10658 && (XINT (y, 1) == UNSPEC_EXTRACT_S16
10659 || XINT (y, 1) == UNSPEC_EXTRACT_U16))
10661 rtx offset = XVECEXP (y, 0, 1);
10663 x = gen_rtx_PLUS (Pmode, XVECEXP (y, 0, 0), offset);
10664 if (MEM_P (orig_x))
10665 x = replace_equiv_address_nv (orig_x, x);
10666 return x;
10671 return orig_x;
10674 /* Mark the use of a constant in the literal table. If the constant
10675 has multiple labels, make it unique. */
10676 static rtx
10677 mark_constant_pool_use (rtx x)
10679 rtx_insn *insn, *lab;
10680 rtx pattern;
10682 if (x == NULL_RTX)
10683 return x;
10685 switch (GET_CODE (x))
10687 case LABEL_REF:
10688 x = XEXP (x, 0);
10689 case CODE_LABEL:
10690 break;
10691 default:
10692 return x;
10695 /* Get the first label in the list of labels for the same constant
10696 and delete another labels in the list. */
10697 lab = as_a <rtx_insn *> (x);
10698 for (insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
10700 if (!LABEL_P (insn)
10701 || LABEL_REFS (insn) != NEXT_INSN (insn))
10702 break;
10703 lab = insn;
10706 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
10707 INSN_DELETED_P (insn) = 1;
10709 /* Mark constants in a window. */
10710 for (insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; insn = NEXT_INSN (insn))
10712 if (!NONJUMP_INSN_P (insn))
10713 continue;
10715 pattern = PATTERN (insn);
10716 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
10717 continue;
10719 switch (XINT (pattern, 1))
10721 case UNSPECV_CONST2:
10722 case UNSPECV_CONST4:
10723 case UNSPECV_CONST8:
10724 XVECEXP (pattern, 0, 1) = const1_rtx;
10725 break;
10726 case UNSPECV_WINDOW_END:
10727 if (XVECEXP (pattern, 0, 0) == x)
10728 return lab;
10729 break;
10730 case UNSPECV_CONST_END:
10731 return lab;
10732 default:
10733 break;
10737 return lab;
10740 /* Return true if it's possible to redirect BRANCH1 to the destination
10741 of an unconditional jump BRANCH2. We only want to do this if the
10742 resulting branch will have a short displacement. */
10743 bool
10744 sh_can_redirect_branch (rtx_insn *branch1, rtx_insn *branch2)
10746 if (flag_expensive_optimizations && simplejump_p (branch2))
10748 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
10749 rtx_insn *insn;
10750 int distance;
10752 for (distance = 0, insn = NEXT_INSN (branch1);
10753 insn && distance < 256;
10754 insn = PREV_INSN (insn))
10756 if (insn == dest)
10757 return true;
10758 else
10759 distance += get_attr_length (insn);
10761 for (distance = 0, insn = NEXT_INSN (branch1);
10762 insn && distance < 256;
10763 insn = NEXT_INSN (insn))
10765 if (insn == dest)
10766 return true;
10767 else
10768 distance += get_attr_length (insn);
10771 return false;
10774 /* Return nonzero if register old_reg can be renamed to register new_reg. */
10775 bool
10776 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
10777 unsigned int new_reg)
10779 /* Interrupt functions can only use registers that have already been
10780 saved by the prologue, even if they would normally be
10781 call-clobbered. */
10782 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
10783 return false;
10785 return true;
10788 /* Function to update the integer COST
10789 based on the relationship between INSN that is dependent on
10790 DEP_INSN through the dependence LINK. The default is to make no
10791 adjustment to COST. This can be used for example to specify to
10792 the scheduler that an output- or anti-dependence does not incur
10793 the same cost as a data-dependence. The return value should be
10794 the new value for COST. */
10795 static int
10796 sh_adjust_cost (rtx_insn *insn, rtx link ATTRIBUTE_UNUSED,
10797 rtx_insn *dep_insn, int cost)
10799 rtx reg, use_pat;
10801 if (TARGET_SHMEDIA)
10803 /* On SHmedia, if the dependence is an anti-dependence or
10804 output-dependence, there is no cost. */
10805 if (REG_NOTE_KIND (link) != 0)
10807 /* However, dependencies between target register loads and
10808 uses of the register in a subsequent block that are separated
10809 by a conditional branch are not modelled - we have to do with
10810 the anti-dependency between the target register load and the
10811 conditional branch that ends the current block. */
10812 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
10813 && GET_CODE (PATTERN (dep_insn)) == SET
10814 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
10815 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
10816 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
10818 int orig_cost = cost;
10819 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
10820 rtx target = ((!note || XINT (note, 0) * 2 < REG_BR_PROB_BASE)
10821 ? insn : JUMP_LABEL (insn));
10822 /* On the likely path, the branch costs 1, on the unlikely path,
10823 it costs 3. */
10824 cost--;
10826 target = next_active_insn (target);
10827 while (target && ! flow_dependent_p (target, dep_insn)
10828 && --cost > 0);
10829 /* If two branches are executed in immediate succession, with the
10830 first branch properly predicted, this causes a stall at the
10831 second branch, hence we won't need the target for the
10832 second branch for two cycles after the launch of the first
10833 branch. */
10834 if (cost > orig_cost - 2)
10835 cost = orig_cost - 2;
10837 else
10838 cost = 0;
10841 else if (get_attr_is_mac_media (insn)
10842 && get_attr_is_mac_media (dep_insn))
10843 cost = 1;
10845 else if (! reload_completed
10846 && GET_CODE (PATTERN (insn)) == SET
10847 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
10848 && GET_CODE (PATTERN (dep_insn)) == SET
10849 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
10850 && cost < 4)
10851 cost = 4;
10852 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
10853 that is needed at the target. */
10854 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
10855 && ! flow_dependent_p (insn, dep_insn))
10856 cost--;
10858 else if (REG_NOTE_KIND (link) == 0)
10860 enum attr_type type;
10861 rtx dep_set;
10863 if (recog_memoized (insn) < 0
10864 || recog_memoized (dep_insn) < 0)
10865 return cost;
10867 dep_set = single_set (dep_insn);
10869 /* The latency that we specify in the scheduling description refers
10870 to the actual output, not to an auto-increment register; for that,
10871 the latency is one. */
10872 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
10874 rtx set = single_set (insn);
10876 if (set
10877 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
10878 && (!MEM_P (SET_DEST (set))
10879 || !reg_mentioned_p (SET_DEST (dep_set),
10880 XEXP (SET_DEST (set), 0))))
10881 cost = 1;
10883 /* The only input for a call that is timing-critical is the
10884 function's address. */
10885 if (CALL_P (insn))
10887 rtx call = get_call_rtx_from (insn);
10888 if (call
10889 /* sibcalli_thunk uses a symbol_ref in an unspec. */
10890 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
10891 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
10892 cost -= TARGET_SH4_300 ? 3 : 6;
10894 /* Likewise, the most timing critical input for an sfuncs call
10895 is the function address. However, sfuncs typically start
10896 using their arguments pretty quickly.
10897 Assume a four cycle delay for SH4 before they are needed.
10898 Cached ST40-300 calls are quicker, so assume only a one
10899 cycle delay there.
10900 ??? Maybe we should encode the delays till input registers
10901 are needed by sfuncs into the sfunc call insn. */
10902 /* All sfunc calls are parallels with at least four components.
10903 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
10904 else if (GET_CODE (PATTERN (insn)) == PARALLEL
10905 && XVECLEN (PATTERN (insn), 0) >= 4
10906 && (reg = sfunc_uses_reg (insn)))
10908 if (! reg_set_p (reg, dep_insn))
10909 cost -= TARGET_SH4_300 ? 1 : 4;
10911 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
10913 enum attr_type dep_type = get_attr_type (dep_insn);
10915 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
10916 cost--;
10917 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
10918 && (type = get_attr_type (insn)) != TYPE_CALL
10919 && type != TYPE_SFUNC)
10920 cost--;
10921 /* When the preceding instruction loads the shift amount of
10922 the following SHAD/SHLD, the latency of the load is increased
10923 by 1 cycle. */
10924 if (get_attr_type (insn) == TYPE_DYN_SHIFT
10925 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
10926 && reg_overlap_mentioned_p (SET_DEST (dep_set),
10927 XEXP (SET_SRC (single_set (insn)),
10928 1)))
10929 cost++;
10930 /* When an LS group instruction with a latency of less than
10931 3 cycles is followed by a double-precision floating-point
10932 instruction, FIPR, or FTRV, the latency of the first
10933 instruction is increased to 3 cycles. */
10934 else if (cost < 3
10935 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
10936 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
10937 cost = 3;
10938 /* The lsw register of a double-precision computation is ready one
10939 cycle earlier. */
10940 else if (reload_completed
10941 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
10942 && (use_pat = single_set (insn))
10943 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
10944 SET_SRC (use_pat)))
10945 cost -= 1;
10947 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
10948 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
10949 cost -= 1;
10951 else if (TARGET_SH4_300)
10953 /* Stores need their input register two cycles later. */
10954 if (dep_set && cost >= 1
10955 && ((type = get_attr_type (insn)) == TYPE_STORE
10956 || type == TYPE_PSTORE
10957 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
10959 rtx set = single_set (insn);
10961 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
10962 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
10964 cost -= 2;
10965 /* But don't reduce the cost below 1 if the address depends
10966 on a side effect of dep_insn. */
10967 if (cost < 1
10968 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
10969 cost = 1;
10974 /* An anti-dependence penalty of two applies if the first insn is a double
10975 precision fadd / fsub / fmul. */
10976 else if (!TARGET_SH4_300
10977 && REG_NOTE_KIND (link) == REG_DEP_ANTI
10978 && recog_memoized (dep_insn) >= 0
10979 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
10980 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
10981 /* A lot of alleged anti-flow dependences are fake,
10982 so check this one is real. */
10983 && flow_dependent_p (dep_insn, insn))
10984 cost = 2;
10986 return cost;
10989 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
10990 if DEP_INSN is anti-flow dependent on INSN. */
10991 static bool
10992 flow_dependent_p (rtx insn, rtx dep_insn)
10994 rtx tmp = PATTERN (insn);
10996 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
10997 return tmp == NULL_RTX;
11000 /* A helper function for flow_dependent_p called through note_stores. */
11001 static void
11002 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
11004 rtx * pinsn = (rtx *) data;
11006 if (*pinsn && reg_referenced_p (x, *pinsn))
11007 *pinsn = NULL_RTX;
11010 /* For use by sh_allocate_initial_value. Note that sh.md contains some
11011 'special function' patterns (type sfunc) that clobber pr, but that
11012 do not look like function calls to leaf_function_p. Hence we must
11013 do this extra check. */
11014 static int
11015 sh_pr_n_sets (void)
11017 return DF_REG_DEF_COUNT (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
11020 /* Return where to allocate pseudo for a given hard register initial
11021 value. */
11022 static rtx
11023 sh_allocate_initial_value (rtx hard_reg)
11025 rtx x;
11027 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
11029 if (crtl->is_leaf
11030 && ! sh_pr_n_sets ()
11031 && ! (TARGET_SHCOMPACT
11032 && ((crtl->args.info.call_cookie
11033 & ~ CALL_COOKIE_RET_TRAMP (1))
11034 || crtl->saves_all_registers)))
11035 x = hard_reg;
11036 else
11037 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
11039 else
11040 x = NULL_RTX;
11042 return x;
11045 /* This function returns "2" to indicate dual issue for the SH4
11046 processor. To be used by the DFA pipeline description. */
11047 static int
11048 sh_issue_rate (void)
11050 if (TARGET_SUPERSCALAR)
11051 return 2;
11052 else
11053 return 1;
11056 /* Functions for ready queue reordering for sched1. */
11058 /* Get weight for mode for a set x. */
11059 static short
11060 find_set_regmode_weight (rtx x, enum machine_mode mode)
11062 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
11063 return 1;
11064 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
11066 if (REG_P (SET_DEST (x)))
11068 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
11069 return 1;
11070 else
11071 return 0;
11073 return 1;
11075 return 0;
11078 /* Get regmode weight for insn. */
11079 static short
11080 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
11082 short reg_weight = 0;
11083 rtx x;
11085 /* Increment weight for each register born here. */
11086 x = PATTERN (insn);
11087 reg_weight += find_set_regmode_weight (x, mode);
11088 if (GET_CODE (x) == PARALLEL)
11090 int j;
11091 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
11093 x = XVECEXP (PATTERN (insn), 0, j);
11094 reg_weight += find_set_regmode_weight (x, mode);
11097 /* Decrement weight for each register that dies here. */
11098 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
11100 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
11102 rtx note = XEXP (x, 0);
11103 if (REG_P (note) && GET_MODE (note) == mode)
11104 reg_weight--;
11107 return reg_weight;
11110 /* Calculate regmode weights for all insns of a basic block. */
11111 static void
11112 find_regmode_weight (basic_block b, enum machine_mode mode)
11114 rtx_insn *insn, *next_tail, *head, *tail;
11116 get_ebb_head_tail (b, b, &head, &tail);
11117 next_tail = NEXT_INSN (tail);
11119 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
11121 /* Handle register life information. */
11122 if (!INSN_P (insn))
11123 continue;
11125 if (mode == SFmode)
11126 INSN_REGMODE_WEIGHT (insn, mode) =
11127 find_insn_regmode_weight (insn, mode)
11128 + 2 * find_insn_regmode_weight (insn, DFmode);
11129 else if (mode == SImode)
11130 INSN_REGMODE_WEIGHT (insn, mode) =
11131 find_insn_regmode_weight (insn, mode)
11132 + 2 * find_insn_regmode_weight (insn, DImode);
11136 /* Comparison function for ready queue sorting. */
11137 static int
11138 rank_for_reorder (const void *x, const void *y)
11140 rtx_insn *tmp = *(rtx_insn * const *) y;
11141 rtx_insn *tmp2 = *(rtx_insn * const *) x;
11143 /* The insn in a schedule group should be issued the first. */
11144 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
11145 return SCHED_GROUP_P (tmp2) ? 1 : -1;
11147 /* If insns are equally good, sort by INSN_LUID (original insn order), This
11148 minimizes instruction movement, thus minimizing sched's effect on
11149 register pressure. */
11150 return INSN_LUID (tmp) - INSN_LUID (tmp2);
11153 /* Resort the array A in which only element at index N may be out of order. */
11154 static void
11155 swap_reorder (rtx_insn **a, int n)
11157 rtx_insn *insn = a[n - 1];
11158 int i = n - 2;
11160 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
11162 a[i + 1] = a[i];
11163 i -= 1;
11165 a[i + 1] = insn;
11168 /* Sort the ready list by ascending priority. */
11169 static void
11170 ready_reorder (rtx_insn **ready, int nready)
11172 if (nready == 2)
11173 swap_reorder (ready, nready);
11174 else if (nready > 2)
11175 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
11178 /* Count life regions of r0 for a block. */
11179 static int
11180 find_r0_life_regions (basic_block b)
11182 rtx_insn *end, *insn;
11183 rtx pset;
11184 rtx r0_reg;
11185 int live;
11186 int set;
11187 int death = 0;
11189 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
11191 set = 1;
11192 live = 1;
11194 else
11196 set = 0;
11197 live = 0;
11200 insn = BB_HEAD (b);
11201 end = BB_END (b);
11202 r0_reg = gen_rtx_REG (SImode, R0_REG);
11203 while (1)
11205 if (INSN_P (insn))
11207 if (find_regno_note (insn, REG_DEAD, R0_REG))
11209 death++;
11210 live = 0;
11212 if (!live
11213 && (pset = single_set (insn))
11214 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
11215 && !find_regno_note (insn, REG_UNUSED, R0_REG))
11217 set++;
11218 live = 1;
11221 if (insn == end)
11222 break;
11223 insn = NEXT_INSN (insn);
11225 return set - death;
11228 /* Calculate regmode weights for all insns of all basic block. */
11229 static void
11230 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
11231 int verbose ATTRIBUTE_UNUSED,
11232 int old_max_uid)
11234 basic_block b;
11236 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
11237 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
11238 r0_life_regions = 0;
11240 FOR_EACH_BB_REVERSE_FN (b, cfun)
11242 find_regmode_weight (b, SImode);
11243 find_regmode_weight (b, SFmode);
11244 if (!reload_completed)
11245 r0_life_regions += find_r0_life_regions (b);
11248 CURR_REGMODE_PRESSURE (SImode) = 0;
11249 CURR_REGMODE_PRESSURE (SFmode) = 0;
11252 /* Cleanup. */
11253 static void
11254 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
11255 int verbose ATTRIBUTE_UNUSED)
11257 if (regmode_weight[0])
11259 free (regmode_weight[0]);
11260 regmode_weight[0] = NULL;
11262 if (regmode_weight[1])
11264 free (regmode_weight[1]);
11265 regmode_weight[1] = NULL;
11269 /* The scalar modes supported differs from the default version in TImode
11270 for 32-bit SHMEDIA. */
11271 static bool
11272 sh_scalar_mode_supported_p (enum machine_mode mode)
11274 if (TARGET_SHMEDIA32 && mode == TImode)
11275 return false;
11277 return default_scalar_mode_supported_p (mode);
11280 /* Cache the can_issue_more so that we can return it from reorder2. Also,
11281 keep count of register pressures on SImode and SFmode. */
11282 static int
11283 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
11284 int sched_verbose ATTRIBUTE_UNUSED,
11285 rtx_insn *insn,
11286 int can_issue_more)
11288 if (GET_CODE (PATTERN (insn)) != USE
11289 && GET_CODE (PATTERN (insn)) != CLOBBER)
11290 cached_can_issue_more = can_issue_more - 1;
11291 else
11292 cached_can_issue_more = can_issue_more;
11294 if (reload_completed)
11295 return cached_can_issue_more;
11297 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
11298 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
11300 return cached_can_issue_more;
11303 static void
11304 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
11305 int verbose ATTRIBUTE_UNUSED,
11306 int veclen ATTRIBUTE_UNUSED)
11308 CURR_REGMODE_PRESSURE (SImode) = 0;
11309 CURR_REGMODE_PRESSURE (SFmode) = 0;
11312 /* Some magic numbers. */
11313 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11314 functions that already have high pressure on r0. */
11315 #define R0_MAX_LIFE_REGIONS 2
11316 /* Register Pressure thresholds for SImode and SFmode registers. */
11317 #define SIMODE_MAX_WEIGHT 5
11318 #define SFMODE_MAX_WEIGHT 10
11320 /* Return true if the pressure is high for MODE. */
11321 static bool
11322 high_pressure (enum machine_mode mode)
11324 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
11325 functions that already have high pressure on r0. */
11326 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
11327 return true;
11329 if (mode == SFmode)
11330 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
11331 else
11332 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
11335 /* Reorder ready queue if register pressure is high. */
11336 static int
11337 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
11338 int sched_verbose ATTRIBUTE_UNUSED,
11339 rtx_insn **ready,
11340 int *n_readyp,
11341 int clock_var ATTRIBUTE_UNUSED)
11343 if (reload_completed)
11344 return sh_issue_rate ();
11346 if (high_pressure (SFmode) || high_pressure (SImode))
11348 ready_reorder (ready, *n_readyp);
11351 return sh_issue_rate ();
11354 /* Skip cycles if the current register pressure is high. */
11355 static int
11356 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
11357 int sched_verbose ATTRIBUTE_UNUSED,
11358 rtx_insn **ready ATTRIBUTE_UNUSED,
11359 int *n_readyp ATTRIBUTE_UNUSED,
11360 int clock_var ATTRIBUTE_UNUSED)
11362 if (reload_completed)
11363 return cached_can_issue_more;
11365 if (high_pressure(SFmode) || high_pressure (SImode))
11366 skip_cycles = 1;
11368 return cached_can_issue_more;
11371 /* Skip cycles without sorting the ready queue. This will move insn from
11372 Q->R. If this is the last cycle we are skipping; allow sorting of ready
11373 queue by sh_reorder. */
11375 /* Generally, skipping these many cycles are sufficient for all insns to move
11376 from Q -> R. */
11377 #define MAX_SKIPS 8
11379 static int
11380 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
11381 int sched_verbose ATTRIBUTE_UNUSED,
11382 rtx_insn *insn ATTRIBUTE_UNUSED,
11383 int last_clock_var,
11384 int clock_var,
11385 int *sort_p)
11387 if (reload_completed)
11388 return 0;
11390 if (skip_cycles)
11392 if ((clock_var - last_clock_var) < MAX_SKIPS)
11394 *sort_p = 0;
11395 return 1;
11397 /* If this is the last cycle we are skipping, allow reordering of R. */
11398 if ((clock_var - last_clock_var) == MAX_SKIPS)
11400 *sort_p = 1;
11401 return 1;
11405 skip_cycles = 0;
11407 return 0;
11410 /* SHmedia requires registers for branches, so we can't generate new
11411 branches past reload. */
11412 static bool
11413 sh_cannot_modify_jumps_p (void)
11415 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
11418 static reg_class_t
11419 sh_target_reg_class (void)
11421 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
11424 static bool
11425 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
11427 if (! shmedia_space_reserved_for_target_registers)
11428 return 0;
11429 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
11430 return 0;
11432 HARD_REG_SET dummy;
11433 if (calc_live_regs (&dummy) >= 6 * 8)
11434 return 1;
11435 return 0;
11438 static bool
11439 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
11441 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
11445 On the SH1..SH4, the trampoline looks like
11446 2 0002 D202 mov.l l2,r2
11447 1 0000 D301 mov.l l1,r3
11448 3 0004 422B jmp @r2
11449 4 0006 0009 nop
11450 5 0008 00000000 l1: .long area
11451 6 000c 00000000 l2: .long function
11453 SH5 (compact) uses r1 instead of r3 for the static chain. */
11456 /* Emit RTL insns to initialize the variable parts of a trampoline.
11457 FNADDR is an RTX for the address of the function's pure code.
11458 CXT is an RTX for the static chain value for the function. */
11459 static void
11460 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
11462 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
11463 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
11465 if (TARGET_SHMEDIA64)
11467 rtx tramp_templ;
11468 int fixed_len;
11470 rtx movi1 = GEN_INT (0xcc000010);
11471 rtx shori1 = GEN_INT (0xc8000010);
11472 rtx src, dst;
11474 /* The following trampoline works within a +- 128 KB range for cxt:
11475 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
11476 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
11477 gettr tr1,r1; blink tr0,r63 */
11478 /* Address rounding makes it hard to compute the exact bounds of the
11479 offset for this trampoline, but we have a rather generous offset
11480 range, so frame_offset should do fine as an upper bound. */
11481 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
11483 /* ??? could optimize this trampoline initialization
11484 by writing DImode words with two insns each. */
11485 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
11486 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
11487 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
11488 insn = gen_rtx_AND (DImode, insn, mask);
11489 /* Or in ptb/u .,tr1 pattern */
11490 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
11491 insn = force_operand (insn, NULL_RTX);
11492 insn = gen_lowpart (SImode, insn);
11493 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
11494 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
11495 insn = gen_rtx_AND (DImode, insn, mask);
11496 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
11497 insn = gen_lowpart (SImode, insn);
11498 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
11499 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
11500 insn = gen_rtx_AND (DImode, insn, mask);
11501 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11502 insn = gen_lowpart (SImode, insn);
11503 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
11504 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
11505 insn = gen_rtx_AND (DImode, insn, mask);
11506 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11507 insn = gen_lowpart (SImode, insn);
11508 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
11509 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
11510 insn = gen_rtx_AND (DImode, insn, mask);
11511 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
11512 insn = gen_lowpart (SImode, insn);
11513 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
11514 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
11515 GEN_INT (0x6bf10600));
11516 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
11517 GEN_INT (0x4415fc10));
11518 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
11519 GEN_INT (0x4401fff0));
11520 emit_insn (gen_ic_invalidate_line (tramp));
11521 return;
11523 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
11524 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
11526 tramp_templ = gen_datalabel_ref (tramp_templ);
11527 dst = tramp_mem;
11528 src = gen_const_mem (BLKmode, tramp_templ);
11529 set_mem_align (dst, 256);
11530 set_mem_align (src, 64);
11531 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
11533 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
11534 emit_move_insn (adjust_address (tramp_mem, Pmode,
11535 fixed_len + GET_MODE_SIZE (Pmode)),
11536 cxt);
11537 emit_insn (gen_ic_invalidate_line (tramp));
11538 return;
11540 else if (TARGET_SHMEDIA)
11542 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
11543 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
11544 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
11545 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
11546 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
11547 rotated 10 right, and higher 16 bit of every 32 selected. */
11548 rtx movishori
11549 = force_reg (V2HImode, (simplify_gen_subreg
11550 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
11551 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
11552 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
11554 fnaddr = force_reg (SImode, fnaddr);
11555 cxt = force_reg (SImode, cxt);
11556 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
11557 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
11558 movishori));
11559 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
11560 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11561 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
11562 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
11563 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
11564 gen_rtx_SUBREG (V2HImode, cxt, 0),
11565 movishori));
11566 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
11567 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
11568 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
11569 if (TARGET_LITTLE_ENDIAN)
11571 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
11572 emit_insn (gen_mextr4 (quad2, cxtload, blink));
11574 else
11576 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
11577 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
11579 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
11580 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
11581 emit_insn (gen_ic_invalidate_line (tramp));
11582 return;
11584 else if (TARGET_SHCOMPACT)
11586 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
11587 return;
11589 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
11590 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
11591 SImode));
11592 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
11593 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
11594 SImode));
11595 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
11596 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
11597 if (TARGET_HARD_SH4 || TARGET_SH5)
11599 if (!TARGET_INLINE_IC_INVALIDATE
11600 || (!(TARGET_SH4A_ARCH || TARGET_SH4_300) && TARGET_USERMODE))
11601 emit_library_call (function_symbol (NULL, "__ic_invalidate",
11602 FUNCTION_ORDINARY),
11603 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
11604 else
11605 emit_insn (gen_ic_invalidate_line (tramp));
11609 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
11610 static rtx
11611 sh_trampoline_adjust_address (rtx tramp)
11613 if (TARGET_SHMEDIA)
11614 tramp = expand_simple_binop (Pmode, PLUS, tramp, const1_rtx,
11615 gen_reg_rtx (Pmode), 0, OPTAB_LIB_WIDEN);
11616 return tramp;
11619 /* FIXME: This is overly conservative. A SHcompact function that
11620 receives arguments ``by reference'' will have them stored in its
11621 own stack frame, so it must not pass pointers or references to
11622 these arguments to other functions by means of sibling calls. */
11623 /* If PIC, we cannot make sibling calls to global functions
11624 because the PLT requires r12 to be live. */
11625 static bool
11626 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
11628 return (1
11629 && (! TARGET_SHCOMPACT
11630 || crtl->args.info.stack_regs == 0)
11631 && ! sh_cfun_interrupt_handler_p ()
11632 && (! flag_pic
11633 || (decl && ! TREE_PUBLIC (decl))
11634 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
11637 /* Machine specific built-in functions. */
11639 struct builtin_description
11641 bool (* const is_enabled) (void);
11642 const enum insn_code icode;
11643 const char *const name;
11644 int signature;
11645 tree fndecl;
11648 static bool
11649 shmedia_builtin_p (void)
11651 return TARGET_SHMEDIA;
11654 /* This function can be used if there are any built-ins that are not for
11655 SHmedia. It's commented out to avoid the defined-but-unused warning.
11656 static bool
11657 sh1_builtin_p (void)
11659 return TARGET_SH1;
11663 /* describe number and signedness of arguments; arg[0] == result
11664 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
11665 /* 9: 64-bit pointer, 10: 32-bit pointer */
11666 static const char signature_args[][4] =
11668 #define SH_BLTIN_V2SI2 0
11669 { 4, 4 },
11670 #define SH_BLTIN_V4HI2 1
11671 { 4, 4 },
11672 #define SH_BLTIN_V2SI3 2
11673 { 4, 4, 4 },
11674 #define SH_BLTIN_V4HI3 3
11675 { 4, 4, 4 },
11676 #define SH_BLTIN_V8QI3 4
11677 { 4, 4, 4 },
11678 #define SH_BLTIN_MAC_HISI 5
11679 { 1, 4, 4, 1 },
11680 #define SH_BLTIN_SH_HI 6
11681 { 4, 4, 1 },
11682 #define SH_BLTIN_SH_SI 7
11683 { 4, 4, 1 },
11684 #define SH_BLTIN_V4HI2V2SI 8
11685 { 4, 4, 4 },
11686 #define SH_BLTIN_V4HI2V8QI 9
11687 { 4, 4, 4 },
11688 #define SH_BLTIN_SISF 10
11689 { 4, 2 },
11690 #define SH_BLTIN_LDUA_L 11
11691 { 2, 10 },
11692 #define SH_BLTIN_LDUA_Q 12
11693 { 1, 10 },
11694 #define SH_BLTIN_STUA_L 13
11695 { 0, 10, 2 },
11696 #define SH_BLTIN_STUA_Q 14
11697 { 0, 10, 1 },
11698 #define SH_BLTIN_LDUA_L64 15
11699 { 2, 9 },
11700 #define SH_BLTIN_LDUA_Q64 16
11701 { 1, 9 },
11702 #define SH_BLTIN_STUA_L64 17
11703 { 0, 9, 2 },
11704 #define SH_BLTIN_STUA_Q64 18
11705 { 0, 9, 1 },
11706 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
11707 #define SH_BLTIN_2 19
11708 #define SH_BLTIN_SU 19
11709 { 1, 2 },
11710 #define SH_BLTIN_3 20
11711 #define SH_BLTIN_SUS 20
11712 { 2, 2, 1 },
11713 #define SH_BLTIN_PSSV 21
11714 { 0, 8, 2, 2 },
11715 #define SH_BLTIN_XXUU 22
11716 #define SH_BLTIN_UUUU 22
11717 { 1, 1, 1, 1 },
11718 #define SH_BLTIN_PV 23
11719 { 0, 8 },
11720 #define SH_BLTIN_VP 24
11721 { 8, 0 },
11723 /* mcmv: operands considered unsigned. */
11724 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
11725 /* mperm: control value considered unsigned int. */
11726 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
11727 /* mshards_q: returns signed short. */
11728 /* nsb: takes long long arg, returns unsigned char. */
11729 static struct builtin_description bdesc[] =
11731 { shmedia_builtin_p,
11732 CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2, 0 },
11733 { shmedia_builtin_p,
11734 CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2, 0 },
11735 { shmedia_builtin_p,
11736 CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3, 0 },
11737 { shmedia_builtin_p,
11738 CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3, 0 },
11739 { shmedia_builtin_p,
11740 CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3, 0 },
11741 { shmedia_builtin_p,
11742 CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3, 0 },
11743 { shmedia_builtin_p,
11744 CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3, 0 },
11745 { shmedia_builtin_p,
11746 CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV, 0 },
11747 { shmedia_builtin_p,
11748 CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3, 0 },
11749 { shmedia_builtin_p,
11750 CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3, 0 },
11751 { shmedia_builtin_p,
11752 CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3, 0 },
11753 { shmedia_builtin_p,
11754 CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3, 0 },
11755 { shmedia_builtin_p,
11756 CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3, 0 },
11757 { shmedia_builtin_p,
11758 CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3, 0 },
11759 { shmedia_builtin_p,
11760 CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU, 0 },
11761 { shmedia_builtin_p,
11762 CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3, 0 },
11763 { shmedia_builtin_p,
11764 CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI, 0 },
11765 { shmedia_builtin_p,
11766 CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI, 0 },
11767 { shmedia_builtin_p,
11768 CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3, 0 },
11769 { shmedia_builtin_p,
11770 CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3, 0 },
11771 { shmedia_builtin_p,
11772 CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3, 0 },
11773 { shmedia_builtin_p,
11774 CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3, 0 },
11775 { shmedia_builtin_p,
11776 CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3, 0 },
11777 { shmedia_builtin_p,
11778 CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3, 0 },
11779 { shmedia_builtin_p,
11780 CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3, 0 },
11781 { shmedia_builtin_p,
11782 CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI, 0 },
11783 { shmedia_builtin_p,
11784 CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI, 0 },
11785 { shmedia_builtin_p,
11786 CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, 0 },
11787 { shmedia_builtin_p,
11788 CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3, 0 },
11789 { shmedia_builtin_p,
11790 CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3, 0 },
11791 { shmedia_builtin_p,
11792 CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3, 0 },
11793 { shmedia_builtin_p,
11794 CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3, 0 },
11795 { shmedia_builtin_p,
11796 CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI, 0 },
11797 { shmedia_builtin_p,
11798 CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI, 0 },
11799 { shmedia_builtin_p,
11800 CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU, 0 },
11801 { shmedia_builtin_p,
11802 CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI, 0 },
11803 { shmedia_builtin_p,
11804 CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU, 0 },
11805 { shmedia_builtin_p,
11806 CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI, 0 },
11807 { shmedia_builtin_p,
11808 CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI, 0 },
11809 { shmedia_builtin_p,
11810 CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI, 0 },
11811 { shmedia_builtin_p,
11812 CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI, 0 },
11813 { shmedia_builtin_p,
11814 CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS, 0 },
11815 { shmedia_builtin_p,
11816 CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3, 0 },
11817 { shmedia_builtin_p,
11818 CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3, 0 },
11819 { shmedia_builtin_p,
11820 CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3, 0 },
11821 { shmedia_builtin_p,
11822 CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3, 0 },
11823 { shmedia_builtin_p,
11824 CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3, 0 },
11825 { shmedia_builtin_p,
11826 CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3, 0 },
11827 { shmedia_builtin_p,
11828 CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI, 0 },
11829 { shmedia_builtin_p,
11830 CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI, 0 },
11831 { shmedia_builtin_p,
11832 CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI, 0 },
11833 { shmedia_builtin_p,
11834 CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI, 0 },
11835 { shmedia_builtin_p,
11836 CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3, 0 },
11837 { shmedia_builtin_p,
11838 CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3, 0 },
11839 { shmedia_builtin_p,
11840 CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3, 0 },
11841 { shmedia_builtin_p,
11842 CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3, 0 },
11843 { shmedia_builtin_p,
11844 CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3, 0 },
11845 { shmedia_builtin_p,
11846 CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF, 0 },
11847 { shmedia_builtin_p,
11848 CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
11849 { shmedia_builtin_p,
11850 CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
11851 { shmedia_builtin_p,
11852 CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
11853 { shmedia_builtin_p,
11854 CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
11855 { shmedia_builtin_p,
11856 CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
11857 { shmedia_builtin_p,
11858 CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
11859 { shmedia_builtin_p,
11860 CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L, 0 },
11861 { shmedia_builtin_p,
11862 CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q, 0 },
11863 { shmedia_builtin_p,
11864 CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L, 0 },
11865 { shmedia_builtin_p,
11866 CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q, 0 },
11867 { shmedia_builtin_p,
11868 CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L, 0 },
11869 { shmedia_builtin_p,
11870 CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q, 0 },
11871 { shmedia_builtin_p,
11872 CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L, 0 },
11873 { shmedia_builtin_p,
11874 CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q, 0 },
11875 { shmedia_builtin_p,
11876 CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64, 0 },
11877 { shmedia_builtin_p,
11878 CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64, 0 },
11879 { shmedia_builtin_p,
11880 CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64, 0 },
11881 { shmedia_builtin_p,
11882 CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64, 0 },
11883 { shmedia_builtin_p,
11884 CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64, 0 },
11885 { shmedia_builtin_p,
11886 CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64, 0 },
11887 { shmedia_builtin_p,
11888 CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64, 0 },
11889 { shmedia_builtin_p,
11890 CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64, 0 },
11891 { shmedia_builtin_p,
11892 CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU, 0 },
11893 { shmedia_builtin_p,
11894 CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2, 0 },
11895 { shmedia_builtin_p,
11896 CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV, 0 },
11899 static void
11900 sh_init_builtins (void)
11902 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
11903 memset (shared, 0, sizeof shared);
11905 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
11907 builtin_description* d = &bdesc[di];
11909 if (!d->is_enabled ())
11910 continue;
11912 tree type, arg_type = NULL_TREE;
11913 int signature = d->signature;
11915 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
11916 type = shared[signature];
11917 else
11919 int has_result = signature_args[signature][0] != 0;
11920 tree args[3];
11922 if ((signature_args[signature][1] & 8)
11923 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
11924 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
11925 continue;
11926 if (! TARGET_FPU_ANY
11927 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
11928 continue;
11929 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
11930 args[i] = NULL_TREE;
11931 for (int i = 3; ; i--)
11933 int arg = signature_args[signature][i];
11934 int opno = i - 1 + has_result;
11936 if (arg & 8)
11937 arg_type = ptr_type_node;
11938 else if (arg)
11939 arg_type = (*lang_hooks.types.type_for_mode)
11940 (insn_data[d->icode].operand[opno].mode, (arg & 1));
11941 else if (i)
11942 continue;
11943 else
11944 arg_type = void_type_node;
11945 if (i == 0)
11946 break;
11947 args[i-1] = arg_type;
11949 type = build_function_type_list (arg_type, args[0], args[1],
11950 args[2], NULL_TREE);
11951 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
11952 shared[signature] = type;
11954 d->fndecl =
11955 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
11956 NULL, NULL_TREE);
11960 /* Implements target hook vector_mode_supported_p. */
11961 bool
11962 sh_vector_mode_supported_p (enum machine_mode mode)
11964 if (TARGET_FPU_ANY
11965 && ((mode == V2SFmode)
11966 || (mode == V4SFmode)
11967 || (mode == V16SFmode)))
11968 return true;
11970 else if (TARGET_SHMEDIA
11971 && ((mode == V8QImode)
11972 || (mode == V2HImode)
11973 || (mode == V4HImode)
11974 || (mode == V2SImode)))
11975 return true;
11977 return false;
11980 bool
11981 sh_frame_pointer_required (void)
11983 /* If needed override this in other tm.h files to cope with various OS
11984 lossage requiring a frame pointer. */
11985 if (SUBTARGET_FRAME_POINTER_REQUIRED)
11986 return true;
11988 if (crtl->profile)
11989 return true;
11991 return false;
11994 /* Implements target hook dwarf_calling_convention. Return an enum
11995 of dwarf_calling_convention. */
11997 sh_dwarf_calling_convention (const_tree func)
11999 if (sh_attr_renesas_p (func))
12000 return DW_CC_GNU_renesas_sh;
12002 return DW_CC_normal;
12005 /* Returns the sh builtin decl for CODE. */
12006 static tree
12007 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
12009 if (code >= ARRAY_SIZE (bdesc))
12010 return error_mark_node;
12012 if (!bdesc[code].is_enabled ())
12013 return error_mark_node;
12015 return bdesc[code].fndecl;
12018 /* Expand an expression EXP that calls a built-in function,
12019 with result going to TARGET if that's convenient
12020 (and in mode MODE if that's convenient).
12021 SUBTARGET may be used as the target for computing one of EXP's operands.
12022 IGNORE is nonzero if the value is to be ignored. */
12023 static rtx
12024 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
12025 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
12027 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12028 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12029 const struct builtin_description *d = &bdesc[fcode];
12030 enum insn_code icode = d->icode;
12031 int signature = d->signature;
12032 int nop = 0;
12033 rtx op[4];
12035 if (signature_args[signature][0])
12037 if (ignore)
12038 return NULL_RTX;
12040 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12041 if (! target || GET_MODE (target) != tmode
12042 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12043 target = gen_reg_rtx (tmode);
12044 op[nop++] = target;
12046 else
12047 target = NULL_RTX;
12049 for (int i = 1; i <= 3; i++, nop++)
12051 tree arg;
12052 enum machine_mode opmode, argmode;
12053 tree optype;
12055 if (! signature_args[signature][i])
12056 break;
12057 arg = CALL_EXPR_ARG (exp, i - 1);
12058 if (arg == error_mark_node)
12059 return const0_rtx;
12060 if (signature_args[signature][i] & 8)
12062 opmode = ptr_mode;
12063 optype = ptr_type_node;
12065 else
12067 opmode = insn_data[icode].operand[nop].mode;
12068 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
12070 argmode = TYPE_MODE (TREE_TYPE (arg));
12071 if (argmode != opmode)
12072 arg = build1 (NOP_EXPR, optype, arg);
12073 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
12074 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
12075 op[nop] = copy_to_mode_reg (opmode, op[nop]);
12078 rtx pat = NULL_RTX;
12080 switch (nop)
12082 case 1:
12083 pat = (*insn_data[d->icode].genfun) (op[0]);
12084 break;
12085 case 2:
12086 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
12087 break;
12088 case 3:
12089 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
12090 break;
12091 case 4:
12092 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
12093 break;
12094 default:
12095 gcc_unreachable ();
12097 if (! pat)
12098 return NULL_RTX;
12099 emit_insn (pat);
12100 return target;
12103 void
12104 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
12106 rtx sel0 = const0_rtx;
12107 rtx sel1 = const1_rtx;
12108 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
12109 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
12111 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
12112 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
12115 void
12116 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
12118 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
12120 emit_insn (gen_binary_sf_op0 (op0, op1, op2, op));
12121 emit_insn (gen_binary_sf_op1 (op0, op1, op2, op));
12124 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
12125 We can allow any mode in any general register. The special registers
12126 only allow SImode. Don't allow any mode in the PR.
12128 We cannot hold DCmode values in the XD registers because alter_reg
12129 handles subregs of them incorrectly. We could work around this by
12130 spacing the XD registers like the DR registers, but this would require
12131 additional memory in every compilation to hold larger register vectors.
12132 We could hold SFmode / SCmode values in XD registers, but that
12133 would require a tertiary reload when reloading from / to memory,
12134 and a secondary reload to reload from / to general regs; that
12135 seems to be a losing proposition.
12137 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
12138 it won't be ferried through GP registers first. */
12139 bool
12140 sh_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
12142 if (SPECIAL_REGISTER_P (regno))
12143 return mode == SImode;
12145 if (regno == FPUL_REG)
12146 return (mode == SImode || mode == SFmode);
12148 if (FP_REGISTER_P (regno) && mode == SFmode)
12149 return true;
12151 if (mode == V2SFmode)
12153 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
12154 || GENERAL_REGISTER_P (regno)))
12155 return true;
12156 else
12157 return false;
12160 if (mode == V4SFmode)
12162 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
12163 || GENERAL_REGISTER_P (regno))
12164 return true;
12165 else
12166 return false;
12169 if (mode == V16SFmode)
12171 if (TARGET_SHMEDIA)
12173 if (FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 16 == 0)
12174 return true;
12175 else
12176 return false;
12178 else
12179 return regno == FIRST_XD_REG;
12182 if (FP_REGISTER_P (regno))
12184 if (mode == SFmode
12185 || mode == SImode
12186 || ((TARGET_SH2E || TARGET_SHMEDIA) && mode == SCmode)
12187 || ((((TARGET_SH4 || TARGET_SH2A_DOUBLE) && mode == DFmode)
12188 || mode == DCmode
12189 || (TARGET_SHMEDIA
12190 && (mode == DFmode || mode == DImode
12191 || mode == V2SFmode || mode == TImode)))
12192 && ((regno - FIRST_FP_REG) & 1) == 0)
12193 || ((TARGET_SH4 || TARGET_SHMEDIA) && mode == TImode
12194 && ((regno - FIRST_FP_REG) & 3) == 0))
12195 return true;
12196 else
12197 return false;
12200 if (XD_REGISTER_P (regno))
12201 return mode == DFmode;
12203 if (TARGET_REGISTER_P (regno))
12204 return (mode == DImode || mode == SImode || mode == PDImode);
12206 if (regno == PR_REG)
12207 return mode == SImode;
12209 if (regno == FPSCR_REG)
12210 return mode == PSImode;
12212 /* FIXME. This works around PR target/37633 for -O0. */
12213 if (!optimize && TARGET_SHMEDIA32 && GET_MODE_SIZE (mode) > 4)
12215 unsigned int n = GET_MODE_SIZE (mode) / 8;
12217 if (regno >= FIRST_GENERAL_REG + 10 - n + 1
12218 && regno <= FIRST_GENERAL_REG + 14)
12219 return false;
12222 return true;
12225 /* Return the class of registers for which a mode change from FROM to TO
12226 is invalid. */
12227 bool
12228 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
12229 enum reg_class rclass)
12231 /* We want to enable the use of SUBREGs as a means to
12232 VEC_SELECT a single element of a vector. */
12234 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
12235 This can be problematic when SFmode vector subregs need to be accessed
12236 on the stack with displacement addressing, as it happens with -O0.
12237 Thus we disallow the mode change for -O0. */
12238 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
12239 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
12241 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
12243 if (TARGET_LITTLE_ENDIAN)
12245 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
12246 return reg_classes_intersect_p (DF_REGS, rclass);
12248 else
12250 if (GET_MODE_SIZE (from) < 8)
12251 return reg_classes_intersect_p (DF_REGS, rclass);
12254 return false;
12257 /* Return true if registers in machine mode MODE will likely be
12258 allocated to registers in small register classes. */
12259 bool
12260 sh_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
12262 return (! TARGET_SHMEDIA);
12265 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
12266 that label is used. */
12267 void
12268 sh_mark_label (rtx address, int nuses)
12270 if (GOTOFF_P (address))
12272 /* Extract the label or symbol. */
12273 address = XEXP (address, 0);
12274 if (GET_CODE (address) == PLUS)
12275 address = XEXP (address, 0);
12276 address = XVECEXP (address, 0, 0);
12278 if (GET_CODE (address) == LABEL_REF
12279 && LABEL_P (XEXP (address, 0)))
12280 LABEL_NUSES (XEXP (address, 0)) += nuses;
12283 /* Compute extra cost of moving data between one register class
12284 and another.
12286 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
12287 uses this information. Hence, the general register <-> floating point
12288 register information here is not used for SFmode. */
12289 static int
12290 sh_register_move_cost (enum machine_mode mode,
12291 reg_class_t srcclass, reg_class_t dstclass)
12293 if (dstclass == T_REGS || dstclass == PR_REGS)
12294 return 10;
12296 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
12297 return 4;
12299 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
12300 && REGCLASS_HAS_FP_REG (srcclass)
12301 && REGCLASS_HAS_FP_REG (dstclass))
12302 return 4;
12304 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
12305 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
12307 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
12308 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
12309 return 9;
12311 if ((REGCLASS_HAS_FP_REG (dstclass)
12312 && REGCLASS_HAS_GENERAL_REG (srcclass))
12313 || (REGCLASS_HAS_GENERAL_REG (dstclass)
12314 && REGCLASS_HAS_FP_REG (srcclass)))
12316 /* Discourage trying to use fp regs for a pointer. This also
12317 discourages fp regs with SImode because Pmode is an alias
12318 of SImode on this target. See PR target/48596. */
12319 int addend = (mode == Pmode) ? 40 : 0;
12321 return (((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12) + addend)
12322 * ((GET_MODE_SIZE (mode) + 7) / 8U));
12325 if ((dstclass == FPUL_REGS
12326 && REGCLASS_HAS_GENERAL_REG (srcclass))
12327 || (srcclass == FPUL_REGS
12328 && REGCLASS_HAS_GENERAL_REG (dstclass)))
12329 return 5;
12331 if ((dstclass == FPUL_REGS
12332 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
12333 || (srcclass == FPUL_REGS
12334 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
12335 return 7;
12337 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12338 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12339 return 20;
12341 /* ??? ptabs faults on (value & 0x3) == 0x3 */
12342 if (TARGET_SHMEDIA
12343 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
12345 if (sh_gettrcost >= 0)
12346 return sh_gettrcost;
12347 else if (!TARGET_PT_FIXED)
12348 return 100;
12351 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
12352 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
12353 return 4;
12355 if (TARGET_SHMEDIA
12356 || (TARGET_FMOVD
12357 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
12358 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
12359 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
12361 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
12364 static rtx
12365 emit_load_ptr (rtx reg, rtx addr)
12367 rtx mem = gen_const_mem (ptr_mode, addr);
12369 if (Pmode != ptr_mode)
12370 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
12371 return emit_move_insn (reg, mem);
12374 static void
12375 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12376 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12377 tree function)
12379 CUMULATIVE_ARGS cum;
12380 int structure_value_byref = 0;
12381 rtx this_rtx, this_value, sibcall, funexp;
12382 rtx_insn *insns;
12383 tree funtype = TREE_TYPE (function);
12384 int simple_add = CONST_OK_FOR_ADD (delta);
12385 int did_load = 0;
12386 rtx scratch0, scratch1, scratch2;
12387 unsigned i;
12389 reload_completed = 1;
12390 epilogue_completed = 1;
12391 crtl->uses_only_leaf_regs = 1;
12393 emit_note (NOTE_INSN_PROLOGUE_END);
12395 /* Find the "this" pointer. We have such a wide range of ABIs for the
12396 SH that it's best to do this completely machine independently.
12397 "this" is passed as first argument, unless a structure return pointer
12398 comes first, in which case "this" comes second. */
12399 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
12400 #ifndef PCC_STATIC_STRUCT_RETURN
12401 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12402 structure_value_byref = 1;
12403 #endif /* not PCC_STATIC_STRUCT_RETURN */
12404 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
12406 tree ptype = build_pointer_type (TREE_TYPE (funtype));
12408 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
12410 this_rtx
12411 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
12413 /* For SHcompact, we only have r0 for a scratch register: r1 is the
12414 static chain pointer (even if you can't have nested virtual functions
12415 right now, someone might implement them sometime), and the rest of the
12416 registers are used for argument passing, are callee-saved, or reserved. */
12417 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
12418 -ffixed-reg has been used. */
12419 if (! call_used_regs[0] || fixed_regs[0])
12420 error ("r0 needs to be available as a call-clobbered register");
12421 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
12422 if (! TARGET_SH5)
12424 if (call_used_regs[1] && ! fixed_regs[1])
12425 scratch1 = gen_rtx_REG (ptr_mode, 1);
12426 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
12427 pointing where to return struct values. */
12428 if (call_used_regs[3] && ! fixed_regs[3])
12429 scratch2 = gen_rtx_REG (Pmode, 3);
12431 else if (TARGET_SHMEDIA)
12433 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
12434 if (i != REGNO (scratch0) &&
12435 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
12437 scratch1 = gen_rtx_REG (ptr_mode, i);
12438 break;
12440 if (scratch1 == scratch0)
12441 error ("need a second call-clobbered general purpose register");
12442 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
12443 if (call_used_regs[i] && ! fixed_regs[i])
12445 scratch2 = gen_rtx_REG (Pmode, i);
12446 break;
12448 if (scratch2 == scratch0)
12449 error ("need a call-clobbered target register");
12452 this_value = plus_constant (Pmode, this_rtx, delta);
12453 if (vcall_offset
12454 && (simple_add || scratch0 != scratch1)
12455 && strict_memory_address_p (ptr_mode, this_value))
12457 emit_load_ptr (scratch0, this_value);
12458 did_load = 1;
12461 if (!delta)
12462 ; /* Do nothing. */
12463 else if (simple_add)
12464 emit_move_insn (this_rtx, this_value);
12465 else
12467 emit_move_insn (scratch1, GEN_INT (delta));
12468 emit_insn (gen_add2_insn (this_rtx, scratch1));
12471 if (vcall_offset)
12473 rtx offset_addr;
12475 if (!did_load)
12476 emit_load_ptr (scratch0, this_rtx);
12478 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
12479 if (strict_memory_address_p (ptr_mode, offset_addr))
12480 ; /* Do nothing. */
12481 else if (! TARGET_SH5 && scratch0 != scratch1)
12483 /* scratch0 != scratch1, and we have indexed loads. Get better
12484 schedule by loading the offset into r1 and using an indexed
12485 load - then the load of r1 can issue before the load from
12486 (this_rtx + delta) finishes. */
12487 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12488 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
12490 else if (CONST_OK_FOR_ADD (vcall_offset))
12492 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
12493 offset_addr = scratch0;
12495 else if (scratch0 != scratch1)
12497 emit_move_insn (scratch1, GEN_INT (vcall_offset));
12498 emit_insn (gen_add2_insn (scratch0, scratch1));
12499 offset_addr = scratch0;
12501 else
12502 gcc_unreachable (); /* FIXME */
12503 emit_load_ptr (scratch0, offset_addr);
12505 if (Pmode != ptr_mode)
12506 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
12507 emit_insn (gen_add2_insn (this_rtx, scratch0));
12510 /* Generate a tail call to the target function. */
12511 if (! TREE_USED (function))
12513 assemble_external (function);
12514 TREE_USED (function) = 1;
12516 funexp = XEXP (DECL_RTL (function), 0);
12517 /* If the function is overridden, so is the thunk, hence we don't
12518 need GOT addressing even if this is a public symbol. */
12519 #if 0
12520 if (TARGET_SH1 && ! flag_weak)
12521 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
12522 else
12523 #endif
12524 if (TARGET_SH2 && flag_pic)
12526 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
12527 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
12529 else
12531 if (TARGET_SHMEDIA && flag_pic)
12533 funexp = gen_sym2PIC (funexp);
12534 PUT_MODE (funexp, Pmode);
12536 emit_move_insn (scratch2, funexp);
12537 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
12538 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
12540 sibcall = emit_call_insn (sibcall);
12541 SIBLING_CALL_P (sibcall) = 1;
12542 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
12543 emit_barrier ();
12545 /* Run just enough of rest_of_compilation to do scheduling and get
12546 the insns emitted. Note that use_thunk calls
12547 assemble_start_function and assemble_end_function. */
12549 insns = get_insns ();
12551 if (optimize > 0)
12553 if (! cfun->cfg)
12554 init_flow (cfun);
12555 split_all_insns_noflow ();
12558 sh_reorg ();
12559 shorten_branches (insns);
12560 final_start_function (insns, file, 1);
12561 final (insns, file, 1);
12562 final_end_function ();
12564 reload_completed = 0;
12565 epilogue_completed = 0;
12569 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
12571 rtx sym;
12573 /* If this is not an ordinary function, the name usually comes from a
12574 string literal or an sprintf buffer. Make sure we use the same
12575 string consistently, so that cse will be able to unify address loads. */
12576 if (kind != FUNCTION_ORDINARY)
12577 name = IDENTIFIER_POINTER (get_identifier (name));
12578 sym = gen_rtx_SYMBOL_REF (Pmode, name);
12579 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
12580 if (flag_pic)
12581 switch (kind)
12583 case FUNCTION_ORDINARY:
12584 break;
12585 case SFUNC_GOT:
12587 rtx reg = target ? target : gen_reg_rtx (Pmode);
12589 emit_insn (gen_symGOT2reg (reg, sym));
12590 sym = reg;
12591 break;
12593 case SFUNC_STATIC:
12595 /* ??? To allow cse to work, we use GOTOFF relocations.
12596 We could add combiner patterns to transform this into
12597 straight pc-relative calls with sym2PIC / bsrf when
12598 label load and function call are still 1:1 and in the
12599 same basic block during combine. */
12600 rtx reg = target ? target : gen_reg_rtx (Pmode);
12602 emit_insn (gen_symGOTOFF2reg (reg, sym));
12603 sym = reg;
12604 break;
12607 if (target && sym != target)
12609 emit_move_insn (target, sym);
12610 return target;
12612 return sym;
12615 /* Find the number of a general purpose register in S. */
12616 static int
12617 scavenge_reg (HARD_REG_SET *s)
12619 int r;
12620 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
12621 if (TEST_HARD_REG_BIT (*s, r))
12622 return r;
12623 return -1;
12627 sh_get_pr_initial_val (void)
12629 rtx val;
12631 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
12632 PR register on SHcompact, because it might be clobbered by the prologue.
12633 We check first if that is known to be the case. */
12634 if (TARGET_SHCOMPACT
12635 && ((crtl->args.info.call_cookie
12636 & ~ CALL_COOKIE_RET_TRAMP (1))
12637 || crtl->saves_all_registers))
12638 return gen_frame_mem (SImode, return_address_pointer_rtx);
12640 /* If we haven't finished rtl generation, there might be a nonlocal label
12641 that we haven't seen yet.
12642 ??? get_hard_reg_initial_val fails if it is called after register
12643 allocation has started, unless it has been called before for the
12644 same register. And even then, we end in trouble if we didn't use
12645 the register in the same basic block before. So call
12646 get_hard_reg_initial_val now and wrap it in an unspec if we might
12647 need to replace it. */
12648 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
12649 combine can put the pseudo returned by get_hard_reg_initial_val into
12650 instructions that need a general purpose registers, which will fail to
12651 be recognized when the pseudo becomes allocated to PR. */
12653 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
12654 if (TARGET_SH1)
12655 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
12656 return val;
12659 bool
12660 sh_expand_t_scc (rtx operands[])
12662 enum rtx_code code = GET_CODE (operands[1]);
12663 rtx target = operands[0];
12664 rtx op0 = operands[2];
12665 rtx op1 = operands[3];
12666 rtx result = target;
12667 HOST_WIDE_INT val;
12669 if (!REG_P (op0) || REGNO (op0) != T_REG
12670 || !CONST_INT_P (op1))
12671 return false;
12672 if (!REG_P (result))
12673 result = gen_reg_rtx (SImode);
12674 val = INTVAL (op1);
12675 if ((code == EQ && val == 1) || (code == NE && val == 0))
12676 emit_insn (gen_movt (result, get_t_reg_rtx ()));
12677 else if ((code == EQ && val == 0) || (code == NE && val == 1))
12678 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
12679 else if (code == EQ || code == NE)
12680 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
12681 else
12682 return false;
12683 if (result != target)
12684 emit_move_insn (target, result);
12685 return true;
12688 /* INSN is an sfunc; return the rtx that describes the address used. */
12689 static rtx
12690 extract_sfunc_addr (rtx insn)
12692 rtx pattern, part = NULL_RTX;
12693 int len, i;
12695 pattern = PATTERN (insn);
12696 len = XVECLEN (pattern, 0);
12697 for (i = 0; i < len; i++)
12699 part = XVECEXP (pattern, 0, i);
12700 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
12701 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
12702 return XEXP (part, 0);
12704 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
12705 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
12708 /* Verify that the register in use_sfunc_addr still agrees with the address
12709 used in the sfunc. This prevents fill_slots_from_thread from changing
12710 use_sfunc_addr.
12711 INSN is the use_sfunc_addr instruction, and REG is the register it
12712 guards. */
12713 bool
12714 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
12716 /* Search for the sfunc. It should really come right after INSN. */
12717 while ((insn = NEXT_INSN (insn)))
12719 if (LABEL_P (insn) || JUMP_P (insn))
12720 break;
12721 if (! INSN_P (insn))
12722 continue;
12724 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
12725 insn = seq->insn (0);
12726 if (GET_CODE (PATTERN (insn)) != PARALLEL
12727 || get_attr_type (insn) != TYPE_SFUNC)
12728 continue;
12729 return rtx_equal_p (extract_sfunc_addr (insn), reg);
12731 gcc_unreachable ();
12734 /* This function returns a constant rtx that represents 2**15 / pi in
12735 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
12736 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
12737 static GTY(()) rtx sh_fsca_sf2int_rtx;
12740 sh_fsca_sf2int (void)
12742 if (! sh_fsca_sf2int_rtx)
12744 REAL_VALUE_TYPE rv;
12746 real_from_string (&rv, "10430.378350470453");
12747 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
12750 return sh_fsca_sf2int_rtx;
12753 /* This function returns a constant rtx that represents pi / 2**15 in
12754 SFmode. It's used to scale SFmode angles, in radians, to a
12755 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
12756 maps to 0x10000. */
12757 static GTY(()) rtx sh_fsca_int2sf_rtx;
12760 sh_fsca_int2sf (void)
12762 if (! sh_fsca_int2sf_rtx)
12764 REAL_VALUE_TYPE rv;
12766 real_from_string (&rv, "9.587379924285257e-5");
12767 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
12770 return sh_fsca_int2sf_rtx;
12773 /* Initialize the CUMULATIVE_ARGS structure. */
12774 void
12775 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
12776 tree fntype,
12777 rtx libname ATTRIBUTE_UNUSED,
12778 tree fndecl,
12779 signed int n_named_args,
12780 enum machine_mode mode)
12782 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
12783 pcum->free_single_fp_reg = 0;
12784 pcum->stack_regs = 0;
12785 pcum->byref_regs = 0;
12786 pcum->byref = 0;
12787 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
12789 /* XXX - Should we check TARGET_HITACHI here ??? */
12790 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
12792 if (fntype)
12794 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
12795 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
12796 pcum->prototype_p = prototype_p (fntype);
12797 pcum->arg_count [(int) SH_ARG_INT]
12798 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
12800 pcum->call_cookie
12801 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12802 && pcum->arg_count [(int) SH_ARG_INT] == 0
12803 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
12804 ? int_size_in_bytes (TREE_TYPE (fntype))
12805 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
12806 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
12807 == FIRST_RET_REG));
12809 else
12811 pcum->arg_count [(int) SH_ARG_INT] = 0;
12812 pcum->prototype_p = FALSE;
12813 if (mode != VOIDmode)
12815 pcum->call_cookie =
12816 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
12817 && GET_MODE_SIZE (mode) > 4
12818 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
12820 /* If the default ABI is the Renesas ABI then all library
12821 calls must assume that the library will be using the
12822 Renesas ABI. So if the function would return its result
12823 in memory then we must force the address of this memory
12824 block onto the stack. Ideally we would like to call
12825 targetm.calls.return_in_memory() here but we do not have
12826 the TYPE or the FNDECL available so we synthesize the
12827 contents of that function as best we can. */
12828 pcum->force_mem =
12829 (TARGET_DEFAULT & MASK_HITACHI)
12830 && (mode == BLKmode
12831 || (GET_MODE_SIZE (mode) > 4
12832 && !(mode == DFmode
12833 && TARGET_FPU_DOUBLE)));
12835 else
12837 pcum->call_cookie = 0;
12838 pcum->force_mem = FALSE;
12843 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
12844 not enter into CONST_DOUBLE for the replace.
12846 Note that copying is not done so X must not be shared unless all copies
12847 are to be modified.
12849 This is like replace_rtx, except that we operate on N_REPLACEMENTS
12850 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
12851 replacements[n*2+1] - and that we take mode changes into account.
12853 If a replacement is ambiguous, return NULL_RTX.
12855 If MODIFY is zero, don't modify any rtl in place,
12856 just return zero or nonzero for failure / success. */
12858 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
12860 int i, j;
12861 const char *fmt;
12863 /* The following prevents loops occurrence when we change MEM in
12864 CONST_DOUBLE onto the same CONST_DOUBLE. */
12865 if (x != NULL_RTX && GET_CODE (x) == CONST_DOUBLE)
12866 return x;
12868 for (i = n_replacements - 1; i >= 0 ; i--)
12869 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
12870 return replacements[i*2+1];
12872 /* Allow this function to make replacements in EXPR_LISTs. */
12873 if (x == NULL_RTX)
12874 return NULL_RTX;
12876 if (GET_CODE (x) == SUBREG)
12878 rtx new_rtx = replace_n_hard_rtx (SUBREG_REG (x), replacements,
12879 n_replacements, modify);
12881 if (CONST_INT_P (new_rtx))
12883 x = simplify_subreg (GET_MODE (x), new_rtx,
12884 GET_MODE (SUBREG_REG (x)),
12885 SUBREG_BYTE (x));
12886 if (! x)
12887 abort ();
12889 else if (modify)
12890 SUBREG_REG (x) = new_rtx;
12892 return x;
12894 else if (REG_P (x))
12896 unsigned regno = REGNO (x);
12897 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
12898 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
12899 rtx result = NULL_RTX;
12901 for (i = n_replacements - 1; i >= 0; i--)
12903 rtx from = replacements[i*2];
12904 rtx to = replacements[i*2+1];
12905 unsigned from_regno, from_nregs, to_regno, new_regno;
12907 if (!REG_P (from))
12908 continue;
12909 from_regno = REGNO (from);
12910 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
12911 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
12912 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
12914 if (regno < from_regno
12915 || regno + nregs > from_regno + nregs
12916 || !REG_P (to)
12917 || result)
12918 return NULL_RTX;
12919 to_regno = REGNO (to);
12920 if (to_regno < FIRST_PSEUDO_REGISTER)
12922 new_regno = regno + to_regno - from_regno;
12923 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
12924 != nregs)
12925 return NULL_RTX;
12926 result = gen_rtx_REG (GET_MODE (x), new_regno);
12928 else if (GET_MODE (x) <= GET_MODE (to))
12929 result = gen_lowpart_common (GET_MODE (x), to);
12930 else
12931 result = gen_lowpart_SUBREG (GET_MODE (x), to);
12934 return result ? result : x;
12936 else if (GET_CODE (x) == ZERO_EXTEND)
12938 rtx new_rtx = replace_n_hard_rtx (XEXP (x, 0), replacements,
12939 n_replacements, modify);
12941 if (CONST_INT_P (new_rtx))
12943 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
12944 new_rtx, GET_MODE (XEXP (x, 0)));
12945 if (! x)
12946 abort ();
12948 else if (modify)
12949 XEXP (x, 0) = new_rtx;
12951 return x;
12954 fmt = GET_RTX_FORMAT (GET_CODE (x));
12955 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12957 rtx new_rtx;
12959 if (fmt[i] == 'e')
12961 new_rtx = replace_n_hard_rtx (XEXP (x, i), replacements,
12962 n_replacements, modify);
12963 if (!new_rtx)
12964 return NULL_RTX;
12965 if (modify)
12966 XEXP (x, i) = new_rtx;
12968 else if (fmt[i] == 'E')
12969 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12971 new_rtx = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
12972 n_replacements, modify);
12973 if (!new_rtx)
12974 return NULL_RTX;
12975 if (modify)
12976 XVECEXP (x, i, j) = new_rtx;
12980 return x;
12984 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
12986 enum rtx_code code = TRUNCATE;
12988 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
12990 rtx inner = XEXP (x, 0);
12991 enum machine_mode inner_mode = GET_MODE (inner);
12993 if (inner_mode == mode)
12994 return inner;
12995 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
12996 x = inner;
12997 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
12998 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
13000 code = GET_CODE (x);
13001 x = inner;
13004 return gen_rtx_fmt_e (code, mode, x);
13007 /* Called via for_each_rtx after reload, to clean up truncates of
13008 registers that span multiple actual hard registers. */
13010 shmedia_cleanup_truncate (rtx *p, void *n_changes)
13012 rtx x = *p, reg;
13014 if (GET_CODE (x) != TRUNCATE)
13015 return 0;
13016 reg = XEXP (x, 0);
13017 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && REG_P (reg))
13019 enum machine_mode reg_mode = GET_MODE (reg);
13020 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
13021 subreg_lowpart_offset (DImode, reg_mode));
13022 *(int*) n_changes += 1;
13023 return -1;
13025 return 0;
13028 /* Load and store depend on the highpart of the address. However,
13029 set_attr_alternative does not give well-defined results before reload,
13030 so we must look at the rtl ourselves to see if any of the feeding
13031 registers is used in a memref.
13033 Called by sh_contains_memref_p via for_each_rtx. */
13034 static int
13035 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
13037 return (MEM_P (*loc));
13040 /* Return true iff INSN contains a MEM. */
13041 bool
13042 sh_contains_memref_p (rtx insn)
13044 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
13047 /* Return true iff INSN loads a banked register. */
13048 bool
13049 sh_loads_bankedreg_p (rtx insn)
13051 if (GET_CODE (PATTERN (insn)) == SET)
13053 rtx op = SET_DEST (PATTERN(insn));
13054 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
13055 return true;
13058 return false;
13061 /* FNADDR is the MEM expression from a call expander. Return an address
13062 to use in an SHmedia insn pattern. */
13064 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
13066 int is_sym;
13068 fnaddr = XEXP (fnaddr, 0);
13069 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
13070 if (flag_pic && is_sym)
13072 if (! SYMBOL_REF_LOCAL_P (fnaddr))
13074 rtx reg = gen_reg_rtx (Pmode);
13076 /* We must not use GOTPLT for sibcalls, because PIC_REG
13077 must be restored before the PLT code gets to run. */
13078 if (is_sibcall)
13079 emit_insn (gen_symGOT2reg (reg, fnaddr));
13080 else
13081 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
13082 fnaddr = reg;
13084 else
13086 fnaddr = gen_sym2PIC (fnaddr);
13087 PUT_MODE (fnaddr, Pmode);
13090 /* If ptabs might trap, make this visible to the rest of the compiler.
13091 We generally assume that symbols pertain to valid locations, but
13092 it is possible to generate invalid symbols with asm or linker tricks.
13093 In a list of functions where each returns its successor, an invalid
13094 symbol might denote an empty list. */
13095 if (!TARGET_PT_FIXED
13096 && (!is_sym || TARGET_INVALID_SYMBOLS)
13097 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
13099 rtx tr = gen_reg_rtx (PDImode);
13101 emit_insn (gen_ptabs (tr, fnaddr));
13102 fnaddr = tr;
13104 else if (! target_reg_operand (fnaddr, Pmode))
13105 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
13106 return fnaddr;
13109 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
13110 static reg_class_t
13111 sh_preferred_reload_class (rtx x, reg_class_t rclass)
13113 if (rclass == NO_REGS
13114 && TARGET_SHMEDIA
13115 && (CONST_DOUBLE_P (x)
13116 || GET_CODE (x) == SYMBOL_REF
13117 || PIC_ADDR_P (x)))
13118 return GENERAL_REGS;
13120 return rclass;
13123 /* Implement TARGET_SECONDARY_RELOAD. */
13124 static reg_class_t
13125 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13126 enum machine_mode mode, secondary_reload_info *sri)
13128 enum reg_class rclass = (enum reg_class) rclass_i;
13130 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
13131 && REG_P (XEXP (XEXP (x, 0), 0))
13132 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
13133 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13135 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
13136 return rclass == R0_REGS ? NO_REGS : R0_REGS;
13138 if (REG_P (x) && REGNO (x) == GBR_REG)
13139 return NO_REGS;
13141 if (in_p)
13143 if (REGCLASS_HAS_FP_REG (rclass)
13144 && ! TARGET_SHMEDIA
13145 && immediate_operand ((x), mode)
13146 && ! ((fp_zero_operand (x) || fp_one_operand (x))
13147 && mode == SFmode && fldi_ok ()))
13148 switch (mode)
13150 case SFmode:
13151 sri->icode = CODE_FOR_reload_insf__frn;
13152 return NO_REGS;
13153 case DFmode:
13154 sri->icode = CODE_FOR_reload_indf__frn;
13155 return NO_REGS;
13156 case SImode:
13157 /* ??? If we knew that we are in the appropriate mode -
13158 single precision - we could use a reload pattern directly. */
13159 return FPUL_REGS;
13160 default:
13161 abort ();
13163 if (rclass == FPUL_REGS
13164 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
13165 || REGNO (x) == T_REG))
13166 || GET_CODE (x) == PLUS))
13167 return GENERAL_REGS;
13168 if (rclass == FPUL_REGS && immediate_operand (x, mode))
13170 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
13171 return GENERAL_REGS;
13172 else if (mode == SFmode)
13173 return FP_REGS;
13174 sri->icode = CODE_FOR_reload_insi__i_fpul;
13175 return NO_REGS;
13177 if (rclass == FPSCR_REGS
13178 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
13179 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
13180 return GENERAL_REGS;
13181 if (REGCLASS_HAS_FP_REG (rclass)
13182 && TARGET_SHMEDIA
13183 && immediate_operand (x, mode)
13184 && x != CONST0_RTX (GET_MODE (x))
13185 && GET_MODE (x) != V4SFmode)
13186 return GENERAL_REGS;
13187 if ((mode == QImode || mode == HImode)
13188 && TARGET_SHMEDIA && inqhi_operand (x, mode))
13190 sri->icode = ((mode == QImode)
13191 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
13192 return NO_REGS;
13194 if (TARGET_SHMEDIA && rclass == GENERAL_REGS
13195 && (GET_CODE (x) == LABEL_REF || PIC_ADDR_P (x)))
13196 return TARGET_REGS;
13197 } /* end of input-only processing. */
13199 if (((REGCLASS_HAS_FP_REG (rclass)
13200 && (REG_P (x)
13201 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
13202 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
13203 && TARGET_FMOVD))))
13204 || (REGCLASS_HAS_GENERAL_REG (rclass)
13205 && REG_P (x)
13206 && FP_REGISTER_P (REGNO (x))))
13207 && ! TARGET_SHMEDIA
13208 && (mode == SFmode || mode == SImode))
13209 return FPUL_REGS;
13210 if ((rclass == FPUL_REGS
13211 || (REGCLASS_HAS_FP_REG (rclass)
13212 && ! TARGET_SHMEDIA && mode == SImode))
13213 && (MEM_P (x)
13214 || (REG_P (x)
13215 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
13216 || REGNO (x) == T_REG
13217 || system_reg_operand (x, VOIDmode)))))
13219 if (rclass == FPUL_REGS)
13220 return GENERAL_REGS;
13221 return FPUL_REGS;
13223 if ((rclass == TARGET_REGS
13224 || (TARGET_SHMEDIA && rclass == SIBCALL_REGS))
13225 && !satisfies_constraint_Csy (x)
13226 && (!REG_P (x) || ! GENERAL_REGISTER_P (REGNO (x))))
13227 return GENERAL_REGS;
13228 if ((rclass == MAC_REGS || rclass == PR_REGS)
13229 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
13230 && rclass != REGNO_REG_CLASS (REGNO (x)))
13231 return GENERAL_REGS;
13232 if (rclass != GENERAL_REGS && REG_P (x)
13233 && TARGET_REGISTER_P (REGNO (x)))
13234 return GENERAL_REGS;
13236 /* If here fall back to loading FPUL register through general registers.
13237 This case can happen when movsi_ie insn is picked initially to
13238 load/store the FPUL register from/to another register, and then the
13239 other register is allocated on the stack. */
13240 if (rclass == FPUL_REGS && true_regnum (x) == -1)
13241 return GENERAL_REGS;
13243 /* Force mov.b / mov.w displacement addressing insn to use R0 as
13244 the other operand.
13245 On SH2A could also just leave it alone here, which would result in a
13246 4 byte move insn being generated instead. However, for this to work
13247 the insns must have the appropriate alternatives. */
13248 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13249 && satisfies_constraint_Sdd (x)
13250 && sh_disp_addr_displacement (x)
13251 <= sh_max_mov_insn_displacement (mode, false))
13252 return R0_REGS;
13254 /* When reload is trying to address a QImode or HImode subreg on the stack,
13255 force any subreg byte into R0_REGS, as this is going to become a
13256 displacement address.
13257 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
13258 is on the stack, the memref to it might already require a displacement
13259 and that has to be added to the final address. At this point we don't
13260 know the cumulative displacement so we assume the worst case. */
13261 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
13262 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
13263 return R0_REGS;
13265 return NO_REGS;
13268 static void
13269 sh_conditional_register_usage (void)
13271 int regno;
13272 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
13273 if (! VALID_REGISTER_P (regno))
13274 fixed_regs[regno] = call_used_regs[regno] = 1;
13275 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
13276 if (TARGET_SH5)
13278 call_used_regs[FIRST_GENERAL_REG + 8]
13279 = call_used_regs[FIRST_GENERAL_REG + 9] = 1;
13280 call_really_used_regs[FIRST_GENERAL_REG + 8]
13281 = call_really_used_regs[FIRST_GENERAL_REG + 9] = 1;
13283 if (TARGET_SHMEDIA)
13285 regno_reg_class[FIRST_GENERAL_REG] = GENERAL_REGS;
13286 CLEAR_HARD_REG_SET (reg_class_contents[FP0_REGS]);
13287 regno_reg_class[FIRST_FP_REG] = FP_REGS;
13289 if (flag_pic)
13291 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13292 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13294 /* Renesas saves and restores mac registers on call. */
13295 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
13297 call_really_used_regs[MACH_REG] = 0;
13298 call_really_used_regs[MACL_REG] = 0;
13301 if (TARGET_SHMEDIA)
13303 for (regno = FIRST_TARGET_REG; regno <= LAST_TARGET_REG; regno ++)
13304 if (! fixed_regs[regno] && call_really_used_regs[regno])
13305 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13307 else
13308 for (regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
13309 if (! fixed_regs[regno] && call_really_used_regs[regno])
13310 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
13313 /* Implement TARGET_LEGITIMATE_CONSTANT_P
13315 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
13316 static bool
13317 sh_legitimate_constant_p (enum machine_mode mode, rtx x)
13319 return (TARGET_SHMEDIA
13320 ? ((mode != DFmode && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
13321 || x == CONST0_RTX (mode)
13322 || !TARGET_SHMEDIA_FPU
13323 || TARGET_SHMEDIA64)
13324 : (GET_CODE (x) != CONST_DOUBLE
13325 || mode == DFmode || mode == SFmode
13326 || mode == DImode || GET_MODE (x) == VOIDmode));
13329 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
13331 static void
13332 sh_init_sync_libfuncs (void)
13334 init_sync_libfuncs (UNITS_PER_WORD);
13337 /* Return true if it is appropriate to emit `ret' instructions in the
13338 body of a function. */
13339 bool
13340 sh_can_use_simple_return_p (void)
13342 HARD_REG_SET live_regs_mask;
13343 int d;
13345 /* Some targets require special return insns. */
13346 if (TARGET_SHMEDIA
13347 || (TARGET_SHCOMPACT
13348 && (crtl->args.info.call_cookie & CALL_COOKIE_RET_TRAMP (1))))
13349 return false;
13351 if (! reload_completed || frame_pointer_needed)
13352 return false;
13354 /* Moving prologue around does't reduce the size. */
13355 if (optimize_function_for_size_p (cfun))
13356 return false;
13358 /* Finally, allow for pr save. */
13359 d = calc_live_regs (&live_regs_mask);
13361 if (rounded_frame_size (d) > 4)
13362 return false;
13364 return true;
13367 /*------------------------------------------------------------------------------
13368 Address mode optimization support code
13371 typedef HOST_WIDE_INT disp_t;
13372 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
13373 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
13374 static const disp_t INVALID_DISP = MAX_DISP;
13376 /* A memory reference which is described by a base register and a
13377 displacement. */
13378 class base_reg_disp
13380 public:
13381 base_reg_disp (rtx br, disp_t d);
13383 bool is_reg (void) const;
13384 bool is_disp (void) const;
13385 rtx reg (void) const;
13386 disp_t disp (void) const;
13388 private:
13389 rtx reg_;
13390 disp_t disp_;
13393 inline
13394 base_reg_disp::base_reg_disp (rtx br, disp_t d)
13395 : reg_ (br), disp_ (d)
13399 inline bool
13400 base_reg_disp::is_reg (void) const
13402 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
13405 inline bool
13406 base_reg_disp::is_disp (void) const
13408 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
13411 inline rtx
13412 base_reg_disp::reg (void) const
13414 return reg_;
13417 inline disp_t
13418 base_reg_disp::disp (void) const
13420 return disp_;
13423 /* Find the base register and calculate the displacement for a given
13424 address rtx 'x'.
13425 This is done by walking the insn list backwards and following SET insns
13426 that set the value of the specified reg 'x'. */
13427 static base_reg_disp
13428 sh_find_base_reg_disp (rtx insn, rtx x, disp_t disp = 0, rtx base_reg = NULL)
13430 if (REG_P (x))
13432 if (REGNO (x) == GBR_REG)
13433 return base_reg_disp (x, disp);
13435 /* We've reached a hard-reg. This is probably the point where
13436 function args are copied to pseudos. Do not go any further and
13437 stick to the pseudo. If the original mem addr was in a hard reg
13438 from the beginning, it will become the base reg. */
13439 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
13440 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
13442 /* Try to find the previous insn that sets the reg. */
13443 for (rtx i = prev_nonnote_insn (insn); i != NULL;
13444 i = prev_nonnote_insn (i))
13446 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG)
13447 && CALL_P (i))
13448 break;
13450 if (!NONJUMP_INSN_P (i))
13451 continue;
13453 rtx p = PATTERN (i);
13454 if (p != NULL && GET_CODE (p) == SET && REG_P (XEXP (p, 0))
13455 && REGNO (XEXP (p, 0)) == REGNO (x))
13457 /* If the recursion can't find out any more details about the
13458 source of the set, then this reg becomes our new base reg. */
13459 return sh_find_base_reg_disp (i, XEXP (p, 1), disp, XEXP (p, 0));
13463 /* When here, no previous insn was found that sets the reg.
13464 The input reg is already the base reg. */
13465 return base_reg_disp (x, disp);
13468 else if (GET_CODE (x) == PLUS)
13470 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
13471 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
13473 /* Either left or right val must be a reg.
13474 We don't handle the case of 'reg + reg' here. */
13475 if (left_val.is_reg () && right_val.is_disp ())
13476 return base_reg_disp (left_val.reg (), left_val.disp ()
13477 + right_val.disp () + disp);
13478 else if (right_val.is_reg () && left_val.is_disp ())
13479 return base_reg_disp (right_val.reg (), right_val.disp ()
13480 + left_val.disp () + disp);
13481 else
13482 return base_reg_disp (base_reg, disp);
13485 else if (CONST_INT_P (x))
13486 return base_reg_disp (NULL, disp + INTVAL (x));
13488 /* Didn't find anything useful. */
13489 return base_reg_disp (base_reg, disp);
13492 /* Given an insn and a memory operand, try to find an equivalent GBR
13493 based memory address and return the corresponding new memory address.
13494 Return NULL_RTX if not found. */
13496 sh_find_equiv_gbr_addr (rtx insn, rtx mem)
13498 if (!MEM_P (mem))
13499 return NULL_RTX;
13501 /* Leave post/pre inc/dec or any other side effect addresses alone. */
13502 if (side_effects_p (XEXP (mem, 0)))
13503 return NULL_RTX;
13505 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
13507 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
13509 rtx disp = GEN_INT (gbr_disp.disp ());
13510 if (gbr_displacement (disp, GET_MODE (mem)))
13511 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
13514 return NULL_RTX;
13517 /*------------------------------------------------------------------------------
13518 Manual insn combine support code.
13521 /* Given a reg rtx and a start insn, try to find the insn that sets the
13522 specified reg by using the specified insn stepping function, such as
13523 'prev_nonnote_insn_bb'. When the insn is found, try to extract the rtx
13524 of the reg set. */
13525 set_of_reg
13526 sh_find_set_of_reg (rtx reg, rtx insn, rtx_insn *(*stepfunc)(rtx))
13528 set_of_reg result;
13529 result.insn = insn;
13530 result.set_rtx = NULL_RTX;
13531 result.set_src = NULL_RTX;
13533 if (!REG_P (reg) || insn == NULL_RTX)
13534 return result;
13536 for (result.insn = stepfunc (insn); result.insn != NULL_RTX;
13537 result.insn = stepfunc (result.insn))
13539 if (BARRIER_P (result.insn))
13540 return result;
13541 if (!NONJUMP_INSN_P (result.insn))
13542 continue;
13543 if (reg_set_p (reg, result.insn))
13545 result.set_rtx = set_of (reg, result.insn);
13547 if (result.set_rtx == NULL_RTX || GET_CODE (result.set_rtx) != SET)
13548 return result;
13550 result.set_src = XEXP (result.set_rtx, 1);
13551 return result;
13555 return result;
13558 /* Given an op rtx and an insn, try to find out whether the result of the
13559 specified op consists only of logical operations on T bit stores. */
13560 bool
13561 sh_is_logical_t_store_expr (rtx op, rtx insn)
13563 if (!logical_operator (op, SImode))
13564 return false;
13566 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
13567 int op_is_t_count = 0;
13569 for (int i = 0; i < 2; ++i)
13571 if (t_reg_operand (ops[i], VOIDmode)
13572 || negt_reg_operand (ops[i], VOIDmode))
13573 op_is_t_count++;
13575 else
13577 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
13578 prev_nonnote_insn_bb);
13579 if (op_set.set_src == NULL_RTX)
13580 continue;
13582 if (t_reg_operand (op_set.set_src, VOIDmode)
13583 || negt_reg_operand (op_set.set_src, VOIDmode)
13584 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
13585 op_is_t_count++;
13589 return op_is_t_count == 2;
13592 /* Given the operand that is extended in a sign/zero extend insn, and the
13593 insn, try to figure out whether the sign/zero extension can be replaced
13594 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
13595 NULL_RTX otherwise. */
13597 sh_try_omit_signzero_extend (rtx extended_op, rtx insn)
13599 if (REG_P (extended_op))
13600 extended_op = extended_op;
13601 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
13602 extended_op = SUBREG_REG (extended_op);
13603 else
13604 return NULL_RTX;
13606 /* Reg moves must be of the same mode. */
13607 if (GET_MODE (extended_op) != SImode)
13608 return NULL_RTX;
13610 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
13611 if (s.set_src == NULL_RTX)
13612 return NULL_RTX;
13614 if (t_reg_operand (s.set_src, VOIDmode)
13615 || negt_reg_operand (s.set_src, VOIDmode))
13616 return extended_op;
13618 /* If the zero extended reg was formed by a logical operation, check the
13619 operands of the logical operation. If both originated from T bit
13620 stores the zero extension can be eliminated. */
13621 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
13622 return extended_op;
13624 return NULL_RTX;
13627 static void
13628 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
13629 int prev_mode, HARD_REG_SET regs_live)
13631 if ((TARGET_SH4A_FP || TARGET_SH4_300)
13632 && prev_mode != FP_MODE_NONE && prev_mode != mode)
13634 emit_insn (gen_toggle_pr ());
13635 if (TARGET_FMOVD)
13636 emit_insn (gen_toggle_sz ());
13638 else
13639 fpscr_set_from_mem (mode, regs_live);
13642 static int
13643 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
13645 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
13648 static int
13649 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
13651 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
13652 get_attr_fp_set (insn) != FP_SET_NONE)
13653 return (int) get_attr_fp_set (insn);
13654 else
13655 return mode;
13658 static int
13659 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
13661 return NORMAL_MODE (entity);
13664 static int
13665 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
13667 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
13670 static int
13671 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
13673 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
13676 #include "gt-sh.h"