PR target/83760
[official-gcc.git] / gcc / config / sh / sh.c
blob90d6c733d335279d390ddefc2fa34d0db8e493ae
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2018 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
24 #define IN_TARGET_CODE 1
26 #include "config.h"
27 #define INCLUDE_VECTOR
28 #include "system.h"
29 #include "coretypes.h"
30 #include "backend.h"
31 #include "target.h"
32 #include "rtl.h"
33 #include "tree.h"
34 #include "gimple.h"
35 #include "cfghooks.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "optabs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "flags.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "reload.h"
54 #include "output.h"
55 #include "insn-attr.h"
56 #include "dwarf2.h"
57 #include "langhooks.h"
58 #include "cfgrtl.h"
59 #include "intl.h"
60 #include "sched-int.h"
61 #include "gimplify.h"
62 #include "tm-constrs.h"
63 #include "opts.h"
64 #include "tree-pass.h"
65 #include "context.h"
66 #include "builtins.h"
67 #include "rtl-iter.h"
68 #include "regs.h"
70 /* This file should be included last. */
71 #include "target-def.h"
73 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
75 #define CONST_OK_FOR_ADD(size) CONST_OK_FOR_I08 (size)
76 #define GEN_MOV (*(gen_movsi))
77 #define GEN_ADD3 (*(gen_addsi3))
78 #define GEN_SUB3 (*(gen_subsi3))
80 /* Used to simplify the logic below. Find the attributes wherever
81 they may be. */
82 #define SH_ATTRIBUTES(decl) \
83 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
84 : DECL_ATTRIBUTES (decl) \
85 ? (DECL_ATTRIBUTES (decl)) \
86 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
88 /* Set to true by expand_prologue() when the function is an
89 interrupt handler. */
90 bool current_function_interrupt;
92 tree sh_deferred_function_attributes;
93 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
95 /* Global variables for machine-dependent things. */
97 /* Which cpu are we scheduling for. */
98 enum processor_type sh_cpu;
100 /* Definitions used in ready queue reordering for first scheduling pass. */
102 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
103 static short *regmode_weight[2];
105 /* Total SFmode and SImode weights of scheduled insns. */
106 static int curr_regmode_pressure[2];
108 /* Number of r0 life regions. */
109 static int r0_life_regions;
111 /* If true, skip cycles for Q -> R movement. */
112 static int skip_cycles = 0;
114 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
115 and returned from sh_reorder2. */
116 static short cached_can_issue_more;
118 /* Unique number for UNSPEC_BBR pattern. */
119 static unsigned int unspec_bbr_uid = 1;
121 /* Provides the class number of the smallest class containing
122 reg number. */
123 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
125 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
158 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
159 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
160 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
161 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
162 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
163 GENERAL_REGS, GENERAL_REGS,
166 char sh_register_names[FIRST_PSEUDO_REGISTER] \
167 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
169 char sh_additional_register_names[ADDREGNAMES_SIZE] \
170 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
171 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
173 int assembler_dialect;
175 static void split_branches (rtx_insn *);
176 static int branch_dest (rtx);
177 static void print_slot (rtx_sequence *);
178 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
179 static void dump_table (rtx_insn *, rtx_insn *);
180 static bool broken_move (rtx_insn *);
181 static bool mova_p (rtx_insn *);
182 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
183 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
184 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
185 static void sh_reorg (void);
186 static void sh_option_override (void);
187 static void sh_override_options_after_change (void);
188 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
189 static rtx_insn* emit_frame_insn (rtx);
190 static rtx push (int);
191 static void pop (int);
192 static void push_regs (HARD_REG_SET* mask, bool interrupt_handler);
193 static int calc_live_regs (HARD_REG_SET *);
194 static HOST_WIDE_INT rounded_frame_size (int);
195 static bool sh_frame_pointer_required (void);
196 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
197 static int sh_mode_needed (int, rtx_insn *);
198 static int sh_mode_after (int, int, rtx_insn *);
199 static int sh_mode_entry (int);
200 static int sh_mode_exit (int);
201 static int sh_mode_priority (int entity, int n);
203 static rtx mark_constant_pool_use (rtx);
204 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
205 int, bool *);
206 static tree sh_handle_resbank_handler_attribute (tree *, tree,
207 tree, int, bool *);
208 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
209 tree, int, bool *);
210 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
211 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
212 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
213 static void sh_print_operand (FILE *, rtx, int);
214 static void sh_print_operand_address (FILE *, machine_mode, rtx);
215 static bool sh_print_operand_punct_valid_p (unsigned char code);
216 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
217 static void sh_output_function_epilogue (FILE *);
218 static void sh_insert_attributes (tree, tree *);
219 static const char *sh_check_pch_target_flags (int);
220 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
221 static int sh_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
222 static int sh_issue_rate (void);
223 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
224 static short find_set_regmode_weight (rtx, machine_mode);
225 static short find_insn_regmode_weight (rtx, machine_mode);
226 static void find_regmode_weight (basic_block, machine_mode);
227 static int find_r0_life_regions (basic_block);
228 static void sh_md_init_global (FILE *, int, int);
229 static void sh_md_finish_global (FILE *, int);
230 static int rank_for_reorder (const void *, const void *);
231 static void swap_reorder (rtx_insn **, int);
232 static void ready_reorder (rtx_insn **, int);
233 static bool high_pressure (machine_mode);
234 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
235 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
236 static void sh_md_init (FILE *, int, int);
237 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
239 static bool sh_function_ok_for_sibcall (tree, tree);
241 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
242 static bool sh_ms_bitfield_layout_p (const_tree);
244 static void sh_init_builtins (void);
245 static tree sh_builtin_decl (unsigned, bool);
246 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
247 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
248 HOST_WIDE_INT, tree);
249 static void sh_file_start (void);
250 static bool sh_assemble_integer (rtx, unsigned int, int);
251 static bool flow_dependent_p (rtx, rtx);
252 static void flow_dependent_p_1 (rtx, const_rtx, void *);
253 static int shiftcosts (rtx);
254 static int and_xor_ior_costs (rtx, int);
255 static int addsubcosts (rtx);
256 static int multcosts (rtx);
257 static bool unspec_caller_rtx_p (rtx);
258 static bool sh_cannot_copy_insn_p (rtx_insn *);
259 static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
260 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
261 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
262 static int sh_pr_n_sets (void);
263 static rtx sh_allocate_initial_value (rtx);
264 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
265 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
266 machine_mode,
267 struct secondary_reload_info *);
268 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
269 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
270 static rtx sh_delegitimize_address (rtx);
271 static bool sh_cannot_substitute_mem_equiv_p (rtx);
272 static bool sh_legitimize_address_displacement (rtx *, rtx *,
273 poly_int64, machine_mode);
274 static int scavenge_reg (HARD_REG_SET *s);
276 static rtx sh_struct_value_rtx (tree, int);
277 static rtx sh_function_value (const_tree, const_tree, bool);
278 static bool sh_function_value_regno_p (const unsigned int);
279 static rtx sh_libcall_value (machine_mode, const_rtx);
280 static bool sh_return_in_memory (const_tree, const_tree);
281 static rtx sh_builtin_saveregs (void);
282 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
283 tree, int *, int);
284 static bool sh_strict_argument_naming (cumulative_args_t);
285 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
286 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
287 static tree sh_build_builtin_va_list (void);
288 static void sh_va_start (tree, rtx);
289 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
290 static bool sh_promote_prototypes (const_tree);
291 static machine_mode sh_promote_function_mode (const_tree type,
292 machine_mode,
293 int *punsignedp,
294 const_tree funtype,
295 int for_return);
296 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
297 const_tree, bool);
298 static bool sh_callee_copies (cumulative_args_t, machine_mode,
299 const_tree, bool);
300 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
301 tree, bool);
302 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
303 const_tree, bool);
304 static rtx sh_function_arg (cumulative_args_t, machine_mode,
305 const_tree, bool);
306 static int sh_dwarf_calling_convention (const_tree);
307 static void sh_encode_section_info (tree, rtx, int);
308 static bool sh2a_function_vector_p (tree);
309 static void sh_trampoline_init (rtx, tree, rtx);
310 static rtx sh_trampoline_adjust_address (rtx);
311 static void sh_conditional_register_usage (void);
312 static bool sh_legitimate_constant_p (machine_mode, rtx);
313 static int mov_insn_size (machine_mode, bool);
314 static int mov_insn_alignment_mask (machine_mode, bool);
315 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
316 unsigned int,
317 enum by_pieces_operation,
318 bool);
319 static bool sequence_insn_p (rtx_insn *);
320 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
321 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
322 machine_mode, bool);
323 static bool sh_legitimate_combined_insn (rtx_insn* insn);
325 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
327 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
328 static unsigned int sh_hard_regno_nregs (unsigned int, machine_mode);
329 static bool sh_hard_regno_mode_ok (unsigned int, machine_mode);
330 static bool sh_modes_tieable_p (machine_mode, machine_mode);
331 static bool sh_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
333 static const struct attribute_spec sh_attribute_table[] =
335 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
336 affects_type_identity, handler, exclude } */
337 { "interrupt_handler", 0, 0, true, false, false, false,
338 sh_handle_interrupt_handler_attribute, NULL },
339 { "sp_switch", 1, 1, true, false, false, false,
340 sh_handle_sp_switch_attribute, NULL },
341 { "trap_exit", 1, 1, true, false, false, false,
342 sh_handle_trap_exit_attribute, NULL },
343 { "renesas", 0, 0, false, true, false, false,
344 sh_handle_renesas_attribute, NULL },
345 { "trapa_handler", 0, 0, true, false, false, false,
346 sh_handle_interrupt_handler_attribute, NULL },
347 { "nosave_low_regs", 0, 0, true, false, false, false,
348 sh_handle_interrupt_handler_attribute, NULL },
349 { "resbank", 0, 0, true, false, false, false,
350 sh_handle_resbank_handler_attribute, NULL },
351 { "function_vector", 1, 1, true, false, false, false,
352 sh2a_handle_function_vector_handler_attribute, NULL },
353 { NULL, 0, 0, false, false, false, false, NULL, NULL }
356 /* Initialize the GCC target structure. */
357 #undef TARGET_ATTRIBUTE_TABLE
358 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
360 /* The next two are used for debug info when compiling with -gdwarf. */
361 #undef TARGET_ASM_UNALIGNED_HI_OP
362 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
363 #undef TARGET_ASM_UNALIGNED_SI_OP
364 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
366 #undef TARGET_OPTION_OVERRIDE
367 #define TARGET_OPTION_OVERRIDE sh_option_override
369 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
370 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
371 sh_override_options_after_change
373 #undef TARGET_PRINT_OPERAND
374 #define TARGET_PRINT_OPERAND sh_print_operand
375 #undef TARGET_PRINT_OPERAND_ADDRESS
376 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
377 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
378 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
379 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
380 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
382 #undef TARGET_ASM_FUNCTION_EPILOGUE
383 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
385 #undef TARGET_ASM_OUTPUT_MI_THUNK
386 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
388 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
389 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
390 hook_bool_const_tree_hwi_hwi_const_tree_true
392 #undef TARGET_ASM_FILE_START
393 #define TARGET_ASM_FILE_START sh_file_start
394 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
395 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
397 #undef TARGET_ASM_INTEGER
398 #define TARGET_ASM_INTEGER sh_assemble_integer
400 #undef TARGET_REGISTER_MOVE_COST
401 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
403 #undef TARGET_INSERT_ATTRIBUTES
404 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
406 #undef TARGET_SCHED_ADJUST_COST
407 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
409 #undef TARGET_SCHED_ISSUE_RATE
410 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
412 /* The next 5 hooks have been implemented for reenabling sched1. With the
413 help of these macros we are limiting the movement of insns in sched1 to
414 reduce the register pressure. The overall idea is to keep count of SImode
415 and SFmode regs required by already scheduled insns. When these counts
416 cross some threshold values; give priority to insns that free registers.
417 The insn that frees registers is most likely to be the insn with lowest
418 LUID (original insn order); but such an insn might be there in the stalled
419 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
420 up to a max of 8 cycles so that such insns may move from Q -> R.
422 The description of the hooks are as below:
424 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
425 scheduler; it is called inside the sched_init function just after
426 find_insn_reg_weights function call. It is used to calculate the SImode
427 and SFmode weights of insns of basic blocks; much similar to what
428 find_insn_reg_weights does.
429 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
431 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
432 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
433 (Q)->(R).
435 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
436 high; reorder the ready queue so that the insn with lowest LUID will be
437 issued next.
439 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
440 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
442 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
443 can be returned from TARGET_SCHED_REORDER2.
445 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
447 #undef TARGET_SCHED_DFA_NEW_CYCLE
448 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
450 #undef TARGET_SCHED_INIT_GLOBAL
451 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
453 #undef TARGET_SCHED_FINISH_GLOBAL
454 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
456 #undef TARGET_SCHED_VARIABLE_ISSUE
457 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
459 #undef TARGET_SCHED_REORDER
460 #define TARGET_SCHED_REORDER sh_reorder
462 #undef TARGET_SCHED_REORDER2
463 #define TARGET_SCHED_REORDER2 sh_reorder2
465 #undef TARGET_SCHED_INIT
466 #define TARGET_SCHED_INIT sh_md_init
468 #undef TARGET_DELEGITIMIZE_ADDRESS
469 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
471 #undef TARGET_LEGITIMIZE_ADDRESS
472 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
474 #undef TARGET_CAN_FOLLOW_JUMP
475 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
477 #undef TARGET_MS_BITFIELD_LAYOUT_P
478 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
480 #undef TARGET_INIT_BUILTINS
481 #define TARGET_INIT_BUILTINS sh_init_builtins
482 #undef TARGET_BUILTIN_DECL
483 #define TARGET_BUILTIN_DECL sh_builtin_decl
484 #undef TARGET_EXPAND_BUILTIN
485 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
487 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
488 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
490 #undef TARGET_CANNOT_COPY_INSN_P
491 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
492 #undef TARGET_RTX_COSTS
493 #define TARGET_RTX_COSTS sh_rtx_costs
494 #undef TARGET_ADDRESS_COST
495 #define TARGET_ADDRESS_COST sh_address_cost
496 #undef TARGET_ALLOCATE_INITIAL_VALUE
497 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
499 #undef TARGET_MACHINE_DEPENDENT_REORG
500 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
502 #undef TARGET_DWARF_REGISTER_SPAN
503 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
505 #ifdef HAVE_AS_TLS
506 #undef TARGET_HAVE_TLS
507 #define TARGET_HAVE_TLS true
508 #endif
510 #undef TARGET_PROMOTE_PROTOTYPES
511 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
512 #undef TARGET_PROMOTE_FUNCTION_MODE
513 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
515 #undef TARGET_FUNCTION_VALUE
516 #define TARGET_FUNCTION_VALUE sh_function_value
517 #undef TARGET_FUNCTION_VALUE_REGNO_P
518 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
519 #undef TARGET_LIBCALL_VALUE
520 #define TARGET_LIBCALL_VALUE sh_libcall_value
521 #undef TARGET_STRUCT_VALUE_RTX
522 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
523 #undef TARGET_RETURN_IN_MEMORY
524 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
526 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
527 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
528 #undef TARGET_SETUP_INCOMING_VARARGS
529 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
530 #undef TARGET_STRICT_ARGUMENT_NAMING
531 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
532 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
533 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
534 #undef TARGET_MUST_PASS_IN_STACK
535 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
536 #undef TARGET_PASS_BY_REFERENCE
537 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
538 #undef TARGET_CALLEE_COPIES
539 #define TARGET_CALLEE_COPIES sh_callee_copies
540 #undef TARGET_ARG_PARTIAL_BYTES
541 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
542 #undef TARGET_FUNCTION_ARG
543 #define TARGET_FUNCTION_ARG sh_function_arg
544 #undef TARGET_FUNCTION_ARG_ADVANCE
545 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
547 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
548 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
550 #undef TARGET_BUILD_BUILTIN_VA_LIST
551 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
552 #undef TARGET_EXPAND_BUILTIN_VA_START
553 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
554 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
555 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
557 #undef TARGET_VECTOR_MODE_SUPPORTED_P
558 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
560 #undef TARGET_CHECK_PCH_TARGET_FLAGS
561 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
563 #undef TARGET_DWARF_CALLING_CONVENTION
564 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
566 #undef TARGET_FRAME_POINTER_REQUIRED
567 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
569 #undef TARGET_MODE_EMIT
570 #define TARGET_MODE_EMIT sh_emit_mode_set
572 #undef TARGET_MODE_NEEDED
573 #define TARGET_MODE_NEEDED sh_mode_needed
575 #undef TARGET_MODE_AFTER
576 #define TARGET_MODE_AFTER sh_mode_after
578 #undef TARGET_MODE_ENTRY
579 #define TARGET_MODE_ENTRY sh_mode_entry
581 #undef TARGET_MODE_EXIT
582 #define TARGET_MODE_EXIT sh_mode_exit
584 #undef TARGET_MODE_PRIORITY
585 #define TARGET_MODE_PRIORITY sh_mode_priority
587 /* Return regmode weight for insn. */
588 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
589 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
591 /* Return current register pressure for regmode. */
592 #define CURR_REGMODE_PRESSURE(MODE)\
593 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
595 #undef TARGET_ENCODE_SECTION_INFO
596 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
598 #undef TARGET_LRA_P
599 #define TARGET_LRA_P sh_lra_p
601 #undef TARGET_SECONDARY_RELOAD
602 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
604 #undef TARGET_PREFERRED_RELOAD_CLASS
605 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
607 #undef TARGET_CONDITIONAL_REGISTER_USAGE
608 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
610 #undef TARGET_LEGITIMATE_ADDRESS_P
611 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
613 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
614 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
616 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
617 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
618 sh_legitimize_address_displacement
620 #undef TARGET_TRAMPOLINE_INIT
621 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
622 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
623 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
625 #undef TARGET_LEGITIMATE_CONSTANT_P
626 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
628 #undef TARGET_CANONICALIZE_COMPARISON
629 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
631 #undef TARGET_LEGITIMATE_COMBINED_INSN
632 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
634 #undef TARGET_FIXED_CONDITION_CODE_REGS
635 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
637 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
638 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
639 sh_use_by_pieces_infrastructure_p
641 /* Machine-specific symbol_ref flags. */
642 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
644 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
645 is used by optabs.c atomic op expansion code as well as in sync.md. */
646 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
647 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
649 #undef TARGET_CANNOT_FORCE_CONST_MEM
650 #define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
652 #undef TARGET_HARD_REGNO_NREGS
653 #define TARGET_HARD_REGNO_NREGS sh_hard_regno_nregs
654 #undef TARGET_HARD_REGNO_MODE_OK
655 #define TARGET_HARD_REGNO_MODE_OK sh_hard_regno_mode_ok
657 #undef TARGET_MODES_TIEABLE_P
658 #define TARGET_MODES_TIEABLE_P sh_modes_tieable_p
660 #undef TARGET_CAN_CHANGE_MODE_CLASS
661 #define TARGET_CAN_CHANGE_MODE_CLASS sh_can_change_mode_class
663 #undef TARGET_CONSTANT_ALIGNMENT
664 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
666 struct gcc_target targetm = TARGET_INITIALIZER;
669 /* Information on the currently selected atomic model.
670 This is initialized in sh_option_override. */
671 static sh_atomic_model selected_atomic_model_;
673 const sh_atomic_model&
674 selected_atomic_model (void)
676 return selected_atomic_model_;
679 static sh_atomic_model
680 parse_validate_atomic_model_option (const char* str)
682 const char* model_names[sh_atomic_model::num_models];
683 model_names[sh_atomic_model::none] = "none";
684 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
685 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
686 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
687 model_names[sh_atomic_model::soft_imask] = "soft-imask";
689 const char* model_cdef_names[sh_atomic_model::num_models];
690 model_cdef_names[sh_atomic_model::none] = "NONE";
691 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
692 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
693 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
694 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
696 sh_atomic_model ret;
697 ret.type = sh_atomic_model::none;
698 ret.name = model_names[sh_atomic_model::none];
699 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
700 ret.strict = false;
701 ret.tcb_gbr_offset = -1;
703 /* Handle empty string as 'none'. */
704 if (str == NULL || *str == '\0')
705 return ret;
707 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
709 std::vector<std::string> tokens;
710 for (std::stringstream ss (str); ss.good (); )
712 tokens.push_back (std::string ());
713 std::getline (ss, tokens.back (), ',');
716 if (tokens.empty ())
717 err_ret ("invalid atomic model option");
719 /* The first token must be the atomic model name. */
721 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
722 if (tokens.front () == model_names[i])
724 ret.type = (sh_atomic_model::enum_type)i;
725 ret.name = model_names[i];
726 ret.cdef_name = model_cdef_names[i];
727 goto got_mode_name;
730 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
731 got_mode_name:;
734 /* Go through the remaining tokens. */
735 for (size_t i = 1; i < tokens.size (); ++i)
737 if (tokens[i] == "strict")
738 ret.strict = true;
739 else if (tokens[i].find ("gbr-offset=") == 0)
741 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
742 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
743 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
744 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
745 "option", offset_str.c_str ());
747 else
748 err_ret ("unknown parameter \"%s\" in atomic model option",
749 tokens[i].c_str ());
752 /* Check that the selection makes sense. */
753 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
754 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
755 ret.name);
757 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
758 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
760 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
761 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
763 if (ret.type == sh_atomic_model::soft_tcb
764 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
765 || (ret.tcb_gbr_offset & 3) != 0))
766 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
767 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
768 ret.name);
770 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
771 err_ret ("cannot use atomic model %s in user mode", ret.name);
773 return ret;
775 #undef err_ret
778 /* Register SH specific RTL passes. */
779 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
780 const char* name);
781 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
782 const char* name);
783 static void
784 register_sh_passes (void)
786 /* Running the sh_treg_combine pass after ce1 generates better code when
787 comparisons are combined and reg-reg moves are introduced, because
788 reg-reg moves will be eliminated afterwards. However, there are quite
789 some cases where combine will be unable to fold comparison related insns,
790 thus for now don't do it.
791 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
792 PASS_POS_INSERT_AFTER, "ce1", 1);
795 /* Run sh_treg_combine pass after combine but before register allocation. */
796 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
797 PASS_POS_INSERT_AFTER, "split1", 1);
799 /* Run sh_treg_combine pass after register allocation and basic block
800 reordering as this sometimes creates new opportunities. */
801 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
802 PASS_POS_INSERT_AFTER, "split4", 1);
804 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
805 is known after a conditional branch.
806 This must be done after basic blocks and branch conditions have
807 stabilized and won't be changed by further passes. */
808 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
809 PASS_POS_INSERT_BEFORE, "sched2", 1);
812 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
813 various options, and do some machine dependent initialization. */
814 static void
815 sh_option_override (void)
817 int regno;
819 SUBTARGET_OVERRIDE_OPTIONS;
821 sh_cpu = PROCESSOR_SH1;
822 assembler_dialect = 0;
823 if (TARGET_SH2)
824 sh_cpu = PROCESSOR_SH2;
825 if (TARGET_SH2E)
826 sh_cpu = PROCESSOR_SH2E;
827 if (TARGET_SH2A)
828 sh_cpu = PROCESSOR_SH2A;
829 if (TARGET_SH3)
830 sh_cpu = PROCESSOR_SH3;
831 if (TARGET_SH3E)
832 sh_cpu = PROCESSOR_SH3E;
833 if (TARGET_SH4)
835 assembler_dialect = 1;
836 sh_cpu = PROCESSOR_SH4;
838 if (TARGET_SH4A)
840 assembler_dialect = 1;
841 sh_cpu = PROCESSOR_SH4A;
844 /* User/priviledged mode is supported only on SH3* and SH4*.
845 Disable it for everything else. */
846 if (!TARGET_SH3 && TARGET_USERMODE)
847 TARGET_USERMODE = false;
849 if (! strcmp (sh_div_str, "call-div1"))
850 sh_div_strategy = SH_DIV_CALL_DIV1;
851 else if (! strcmp (sh_div_str, "call-fp") && TARGET_FPU_ANY)
852 sh_div_strategy = SH_DIV_CALL_FP;
853 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
854 sh_div_strategy = SH_DIV_CALL_TABLE;
855 else
857 /* Pick one that makes most sense for the target in general.
858 It is not much good to use different functions depending on -Os,
859 since then we'll end up with two different functions when some of
860 the code is compiled for size, and some for speed. */
862 /* SH4 tends to emphasize speed. */
863 if (TARGET_HARD_SH4)
864 sh_div_strategy = SH_DIV_CALL_TABLE;
865 /* These have their own way of doing things. */
866 else if (TARGET_SH2A)
867 sh_div_strategy = SH_DIV_INTRINSIC;
868 /* SH1 .. SH3 cores often go into small-footprint systems, so
869 default to the smallest implementation available. */
870 else
871 sh_div_strategy = SH_DIV_CALL_DIV1;
874 if (sh_divsi3_libfunc[0])
875 ; /* User supplied - leave it alone. */
876 else if (TARGET_DIVIDE_CALL_FP)
877 sh_divsi3_libfunc = "__sdivsi3_i4";
878 else if (TARGET_DIVIDE_CALL_TABLE)
879 sh_divsi3_libfunc = "__sdivsi3_i4i";
880 else
881 sh_divsi3_libfunc = "__sdivsi3";
883 if (sh_branch_cost == -1)
885 /* The SH1 does not have delay slots, hence we get a pipeline stall
886 at every branch. The SH4 is superscalar, so the single delay slot
887 is not sufficient to keep both pipelines filled.
888 In any case, set the default branch cost to '2', as it results in
889 slightly overall smaller code and also enables some if conversions
890 that are required for matching special T bit related insns. */
891 sh_branch_cost = 2;
894 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
895 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
896 TARGET_ZDCBRANCH = 1;
898 /* FDPIC code is a special form of PIC, and the vast majority of code
899 generation constraints that apply to PIC also apply to FDPIC, so we
900 set flag_pic to avoid the need to check TARGET_FDPIC everywhere
901 flag_pic is checked. */
902 if (TARGET_FDPIC && !flag_pic)
903 flag_pic = 2;
905 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
906 if (! VALID_REGISTER_P (regno))
907 sh_register_names[regno][0] = '\0';
909 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
910 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
911 sh_additional_register_names[regno][0] = '\0';
913 if (flag_pic && ! TARGET_PREFERGOT)
914 flag_no_function_cse = 1;
916 if (targetm.small_register_classes_for_mode_p (VOIDmode))
918 /* Never run scheduling before reload, since that can
919 break global alloc, and generates slower code anyway due
920 to the pressure on R0. */
921 /* Enable sched1 for SH4 if the user explicitly requests.
922 When sched1 is enabled, the ready queue will be reordered by
923 the target hooks if pressure is high. We can not do this for
924 PIC, SH3 and lower as they give spill failures for R0. */
925 if (!TARGET_HARD_SH4 || flag_pic)
926 flag_schedule_insns = 0;
927 /* ??? Current exception handling places basic block boundaries
928 after call_insns. It causes the high pressure on R0 and gives
929 spill failures for R0 in reload. See PR 22553 and the thread
930 on gcc-patches
931 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
932 else if (flag_exceptions)
934 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
935 warning (0, "ignoring -fschedule-insns because of exception "
936 "handling bug");
937 flag_schedule_insns = 0;
939 else if (flag_schedule_insns
940 && !global_options_set.x_flag_schedule_insns)
941 flag_schedule_insns = 0;
944 /* Unwind info is not correct around the CFG unless either a frame
945 pointer is present or M_A_O_A is set. Fixing this requires rewriting
946 unwind info generation to be aware of the CFG and propagating states
947 around edges. */
948 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
949 || flag_exceptions || flag_non_call_exceptions)
950 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
952 warning (0, "unwind tables currently require either a frame pointer "
953 "or -maccumulate-outgoing-args for correctness");
954 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
957 if (flag_unsafe_math_optimizations)
959 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
960 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
961 TARGET_FSCA = 1;
963 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
964 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
965 TARGET_FSRRA = 1;
968 /* Allow fsrra insn only if -funsafe-math-optimizations and
969 -ffinite-math-only is enabled. */
970 TARGET_FSRRA = TARGET_FSRRA
971 && flag_unsafe_math_optimizations
972 && flag_finite_math_only;
974 /* If the -mieee option was not explicitly set by the user, turn it on
975 unless -ffinite-math-only was specified. See also PR 33135. */
976 if (! global_options_set.x_TARGET_IEEE)
977 TARGET_IEEE = ! flag_finite_math_only;
979 if (sh_fixed_range_str)
980 sh_fix_range (sh_fixed_range_str);
982 /* This target defaults to strict volatile bitfields. */
983 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
984 flag_strict_volatile_bitfields = 1;
986 sh_override_options_after_change ();
988 /* Parse atomic model option and make sure it is valid for the current
989 target CPU. */
990 selected_atomic_model_
991 = parse_validate_atomic_model_option (sh_atomic_model_str);
993 register_sh_passes ();
996 /* Implement targetm.override_options_after_change. */
998 static void
999 sh_override_options_after_change (void)
1001 /* Adjust loop, jump and function alignment values (in bytes), if those
1002 were not specified by the user using -falign-loops, -falign-jumps
1003 and -falign-functions options.
1004 32 bit alignment is better for speed, because instructions can be
1005 fetched as a pair from a longword boundary. For size use 16 bit
1006 alignment to get more compact code.
1007 Aligning all jumps increases the code size, even if it might
1008 result in slightly faster code. Thus, it is set to the smallest
1009 alignment possible if not specified by the user. */
1010 if (align_loops == 0)
1011 align_loops = optimize_size ? 2 : 4;
1013 if (align_jumps == 0)
1014 align_jumps = 2;
1015 else if (align_jumps < 2)
1016 align_jumps = 2;
1018 if (align_functions == 0)
1019 align_functions = optimize_size ? 2 : 4;
1021 /* The linker relaxation code breaks when a function contains
1022 alignments that are larger than that at the start of a
1023 compilation unit. */
1024 if (TARGET_RELAX)
1026 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1028 /* Also take possible .long constants / mova tables into account. */
1029 if (min_align < 4)
1030 min_align = 4;
1031 if (align_functions < min_align)
1032 align_functions = min_align;
1036 /* Print the operand address in x to the stream. */
1037 static void
1038 sh_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
1040 switch (GET_CODE (x))
1042 case REG:
1043 case SUBREG:
1044 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1045 break;
1047 case PLUS:
1049 rtx base = XEXP (x, 0);
1050 rtx index = XEXP (x, 1);
1052 switch (GET_CODE (index))
1054 case CONST_INT:
1055 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1056 reg_names[true_regnum (base)]);
1057 break;
1059 case REG:
1060 case SUBREG:
1062 int base_num = true_regnum (base);
1063 int index_num = true_regnum (index);
1065 /* If base or index is R0, make sure that it comes first.
1066 Usually one of them will be R0, but the order might be wrong.
1067 If neither base nor index are R0 it's an error and we just
1068 pass it on to the assembler. This avoids silent wrong code
1069 bugs. */
1070 if (base_num == 0 && index_num != 0)
1071 std::swap (base_num, index_num);
1073 fprintf (stream, "@(%s,%s)", reg_names[index_num],
1074 reg_names[base_num]);
1075 break;
1078 default:
1079 gcc_unreachable ();
1082 break;
1084 case PRE_DEC:
1085 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1086 break;
1088 case POST_INC:
1089 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1090 break;
1092 default:
1093 x = mark_constant_pool_use (x);
1094 output_addr_const (stream, x);
1095 break;
1099 /* Print operand x (an rtx) in assembler syntax to file stream
1100 according to modifier code.
1102 '.' print a .s if insn needs delay slot
1103 ',' print LOCAL_LABEL_PREFIX
1104 '@' print trap, rte or rts depending upon pragma interruptness
1105 '#' output a nop if there is nothing to put in the delay slot
1106 ''' print likelihood suffix (/u for unlikely).
1107 '>' print branch target if -fverbose-asm
1108 'O' print a constant without the #
1109 'R' print the LSW of a dp value - changes if in little endian
1110 'S' print the MSW of a dp value - changes if in little endian
1111 'T' print the next word of a dp value - same as 'R' in big endian mode.
1112 'M' print .b / .w / .l / .s / .d suffix if operand is a MEM.
1113 'N' print 'r63' if the operand is (const_int 0).
1114 'd' print a V2SF reg as dN instead of fpN.
1115 'm' print a pair `base,offset' or `base,index', for LD and ST.
1116 'U' Likewise for {LD,ST}{HI,LO}.
1117 'V' print the position of a single bit set.
1118 'W' print the position of a single bit cleared.
1119 't' print a memory address which is a register.
1120 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1121 'o' output an operator. */
1122 static void
1123 sh_print_operand (FILE *stream, rtx x, int code)
1125 int regno;
1126 machine_mode mode;
1128 switch (code)
1130 tree trapa_attr;
1132 case '.':
1133 if (final_sequence
1134 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1135 && get_attr_length (final_sequence->insn (1)))
1136 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1137 break;
1138 case ',':
1139 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1140 break;
1141 case '@':
1142 trapa_attr = lookup_attribute ("trap_exit",
1143 DECL_ATTRIBUTES (current_function_decl));
1144 if (trapa_attr)
1145 fprintf (stream, "trapa #%ld",
1146 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1147 else if (sh_cfun_interrupt_handler_p ())
1149 if (sh_cfun_resbank_handler_p ())
1150 fprintf (stream, "resbank\n");
1151 fprintf (stream, "rte");
1153 else
1154 fprintf (stream, "rts");
1155 break;
1156 case '#':
1157 /* Output a nop if there's nothing in the delay slot. */
1158 if (dbr_sequence_length () == 0)
1159 fprintf (stream, "\n\tnop");
1160 break;
1161 case '\'':
1163 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1165 if (note
1166 && profile_probability::from_reg_br_prob_note (XINT (note, 0))
1167 < profile_probability::even ())
1168 fputs ("/u", stream);
1169 break;
1171 case '>':
1172 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1174 fputs ("\t! target: ", stream);
1175 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1177 break;
1178 case 'O':
1179 x = mark_constant_pool_use (x);
1180 output_addr_const (stream, x);
1181 break;
1182 /* N.B.: %R / %S / %T adjust memory addresses by four.
1183 While they can be used to access 64 bit parts of a larger value
1184 held in general purpose registers, that won't work with memory -
1185 neither for fp registers, since the frxx names are used. */
1186 case 'R':
1187 if (REG_P (x) || GET_CODE (x) == SUBREG)
1189 regno = true_regnum (x);
1190 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1191 fputs (reg_names[regno], (stream));
1193 else if (MEM_P (x))
1195 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1196 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1198 else
1200 rtx sub = NULL_RTX;
1202 mode = GET_MODE (x);
1203 if (mode == VOIDmode)
1204 mode = DImode;
1205 if (GET_MODE_SIZE (mode) >= 8)
1206 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1207 if (sub)
1208 sh_print_operand (stream, sub, 0);
1209 else
1210 output_operand_lossage ("invalid operand to %%R");
1212 break;
1213 case 'S':
1214 if (REG_P (x) || GET_CODE (x) == SUBREG)
1216 regno = true_regnum (x);
1217 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1218 fputs (reg_names[regno], (stream));
1220 else if (MEM_P (x))
1222 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1223 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1225 else
1227 rtx sub = NULL_RTX;
1229 mode = GET_MODE (x);
1230 if (mode == VOIDmode)
1231 mode = DImode;
1232 if (GET_MODE_SIZE (mode) >= 8)
1233 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1234 if (sub)
1235 sh_print_operand (stream, sub, 0);
1236 else
1237 output_operand_lossage ("invalid operand to %%S");
1239 break;
1240 case 'T':
1241 /* Next word of a double. */
1242 switch (GET_CODE (x))
1244 case REG:
1245 fputs (reg_names[REGNO (x) + 1], (stream));
1246 break;
1247 case MEM:
1249 machine_mode mode = GET_MODE (x);
1250 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1251 && GET_CODE (XEXP (x, 0)) != POST_INC)
1252 x = adjust_address (x, SImode, 4);
1253 sh_print_operand_address (stream, mode, XEXP (x, 0));
1255 break;
1256 default:
1257 break;
1259 break;
1261 case 't':
1262 gcc_assert (MEM_P (x));
1263 x = XEXP (x, 0);
1264 switch (GET_CODE (x))
1266 case REG:
1267 case SUBREG:
1268 sh_print_operand (stream, x, 0);
1269 break;
1270 default:
1271 break;
1273 break;
1275 case 'o':
1276 switch (GET_CODE (x))
1278 case PLUS: fputs ("add", stream); break;
1279 case MINUS: fputs ("sub", stream); break;
1280 case MULT: fputs ("mul", stream); break;
1281 case DIV: fputs ("div", stream); break;
1282 case EQ: fputs ("eq", stream); break;
1283 case NE: fputs ("ne", stream); break;
1284 case GT: case LT: fputs ("gt", stream); break;
1285 case GE: case LE: fputs ("ge", stream); break;
1286 case GTU: case LTU: fputs ("gtu", stream); break;
1287 case GEU: case LEU: fputs ("geu", stream); break;
1288 default:
1289 break;
1291 break;
1292 case 'M':
1293 if (MEM_P (x))
1295 switch (GET_MODE (x))
1297 case E_QImode: fputs (".b", stream); break;
1298 case E_HImode: fputs (".w", stream); break;
1299 case E_SImode: fputs (".l", stream); break;
1300 case E_SFmode: fputs (".s", stream); break;
1301 case E_DFmode: fputs (".d", stream); break;
1302 default: gcc_unreachable ();
1305 break;
1307 case 'm':
1308 gcc_assert (MEM_P (x));
1309 x = XEXP (x, 0);
1310 /* Fall through. */
1311 case 'U':
1312 switch (GET_CODE (x))
1314 case REG:
1315 case SUBREG:
1316 sh_print_operand (stream, x, 0);
1317 fputs (", 0", stream);
1318 break;
1320 case PLUS:
1321 sh_print_operand (stream, XEXP (x, 0), 0);
1322 fputs (", ", stream);
1323 sh_print_operand (stream, XEXP (x, 1), 0);
1324 break;
1326 default:
1327 gcc_unreachable ();
1329 break;
1331 case 'V':
1333 int num = exact_log2 (INTVAL (x));
1334 gcc_assert (num >= 0);
1335 fprintf (stream, "#%d", num);
1337 break;
1339 case 'W':
1341 int num = exact_log2 (~INTVAL (x));
1342 gcc_assert (num >= 0);
1343 fprintf (stream, "#%d", num);
1345 break;
1347 case 'd':
1348 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1350 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1351 break;
1353 case 'N':
1354 if (x == CONST0_RTX (GET_MODE (x)))
1356 fprintf ((stream), "r63");
1357 break;
1359 goto default_output;
1360 case 'u':
1361 if (CONST_INT_P (x))
1363 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1364 break;
1366 /* Fall through. */
1368 default_output:
1369 default:
1370 regno = 0;
1371 mode = GET_MODE (x);
1373 switch (GET_CODE (x))
1375 case TRUNCATE:
1377 rtx inner = XEXP (x, 0);
1378 int offset = 0;
1379 machine_mode inner_mode;
1381 /* We might see SUBREGs with vector mode registers inside. */
1382 if (GET_CODE (inner) == SUBREG
1383 && (GET_MODE_SIZE (GET_MODE (inner))
1384 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1385 && subreg_lowpart_p (inner))
1386 inner = SUBREG_REG (inner);
1387 if (CONST_INT_P (inner))
1389 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1390 goto default_output;
1392 inner_mode = GET_MODE (inner);
1393 if (GET_CODE (inner) == SUBREG
1394 && (GET_MODE_SIZE (GET_MODE (inner))
1395 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1396 && REG_P (SUBREG_REG (inner)))
1398 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1399 GET_MODE (SUBREG_REG (inner)),
1400 SUBREG_BYTE (inner),
1401 GET_MODE (inner));
1402 inner = SUBREG_REG (inner);
1404 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1405 abort ();
1406 /* Floating point register pairs are always big endian;
1407 general purpose registers are 64 bit wide. */
1408 regno = REGNO (inner);
1409 regno = (hard_regno_nregs (regno, inner_mode)
1410 - hard_regno_nregs (regno, mode))
1411 + offset;
1412 x = inner;
1413 goto reg;
1415 case SIGN_EXTEND:
1416 x = XEXP (x, 0);
1417 goto reg;
1418 case SUBREG:
1419 gcc_assert (SUBREG_BYTE (x) == 0
1420 && REG_P (SUBREG_REG (x)));
1422 x = SUBREG_REG (x);
1423 /* Fall through. */
1425 reg:
1426 case REG:
1427 regno += REGNO (x);
1428 if (FP_REGISTER_P (regno)
1429 && mode == V16SFmode)
1430 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1431 else if (FP_REGISTER_P (REGNO (x))
1432 && mode == V4SFmode)
1433 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1434 else if (REG_P (x)
1435 && mode == V2SFmode)
1436 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1437 else if (FP_REGISTER_P (REGNO (x))
1438 && GET_MODE_SIZE (mode) > 4)
1439 fprintf ((stream), "d%s", reg_names[regno] + 1);
1440 else
1441 fputs (reg_names[regno], (stream));
1442 break;
1444 case MEM:
1445 output_address (GET_MODE (x), XEXP (x, 0));
1446 break;
1448 default:
1449 fputc ('#', stream);
1450 output_addr_const (stream, x);
1451 break;
1453 break;
1457 static bool
1458 sh_print_operand_punct_valid_p (unsigned char code)
1460 return (code == '.' || code == '#' || code == '@' || code == ','
1461 || code == '$' || code == '\'' || code == '>');
1464 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1465 static bool
1466 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1468 if (GET_CODE (x) == UNSPEC)
1470 switch (XINT (x, 1))
1472 case UNSPEC_PIC:
1473 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1474 output_addr_const (file, XVECEXP (x, 0, 0));
1475 break;
1476 case UNSPEC_GOT:
1477 output_addr_const (file, XVECEXP (x, 0, 0));
1478 fputs ("@GOT", file);
1479 break;
1480 case UNSPEC_GOTOFF:
1481 output_addr_const (file, XVECEXP (x, 0, 0));
1482 fputs ("@GOTOFF", file);
1483 break;
1484 case UNSPEC_PLT:
1485 output_addr_const (file, XVECEXP (x, 0, 0));
1486 fputs ("@PLT", file);
1487 break;
1488 case UNSPEC_GOTPLT:
1489 output_addr_const (file, XVECEXP (x, 0, 0));
1490 fputs ("@GOTPLT", file);
1491 break;
1492 case UNSPEC_PCREL:
1493 output_addr_const (file, XVECEXP (x, 0, 0));
1494 fputs ("@PCREL", file);
1495 break;
1496 case UNSPEC_DTPOFF:
1497 output_addr_const (file, XVECEXP (x, 0, 0));
1498 fputs ("@DTPOFF", file);
1499 break;
1500 case UNSPEC_GOTTPOFF:
1501 output_addr_const (file, XVECEXP (x, 0, 0));
1502 fputs ("@GOTTPOFF", file);
1503 break;
1504 case UNSPEC_TPOFF:
1505 output_addr_const (file, XVECEXP (x, 0, 0));
1506 fputs ("@TPOFF", file);
1507 break;
1508 case UNSPEC_CALLER:
1510 char name[32];
1511 /* LPCS stands for Label for PIC Call Site. */
1512 targetm.asm_out.generate_internal_label (name, "LPCS",
1513 INTVAL (XVECEXP (x, 0, 0)));
1514 assemble_name (file, name);
1516 break;
1517 case UNSPEC_SYMOFF:
1518 output_addr_const (file, XVECEXP (x, 0, 0));
1519 fputc ('-', file);
1520 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1522 fputc ('(', file);
1523 output_addr_const (file, XVECEXP (x, 0, 1));
1524 fputc (')', file);
1526 else
1527 output_addr_const (file, XVECEXP (x, 0, 1));
1528 break;
1529 case UNSPEC_PCREL_SYMOFF:
1530 output_addr_const (file, XVECEXP (x, 0, 0));
1531 fputs ("-(", file);
1532 output_addr_const (file, XVECEXP (x, 0, 1));
1533 fputs ("-.)", file);
1534 break;
1535 case UNSPEC_GOTFUNCDESC:
1536 output_addr_const (file, XVECEXP (x, 0, 0));
1537 fputs ("@GOTFUNCDESC", file);
1538 break;
1539 case UNSPEC_GOTOFFFUNCDESC:
1540 output_addr_const (file, XVECEXP (x, 0, 0));
1541 fputs ("@GOTOFFFUNCDESC", file);
1542 break;
1543 default:
1544 return false;
1546 return true;
1548 else
1549 return false;
1552 /* Encode symbol attributes of a SYMBOL_REF into its
1553 SYMBOL_REF_FLAGS. */
1554 static void
1555 sh_encode_section_info (tree decl, rtx rtl, int first)
1557 default_encode_section_info (decl, rtl, first);
1559 if (TREE_CODE (decl) == FUNCTION_DECL
1560 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1561 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1564 /* Prepare operands for a move define_expand; specifically, one of the
1565 operands must be in a register. */
1566 void
1567 prepare_move_operands (rtx operands[], machine_mode mode)
1569 if ((mode == SImode || mode == DImode)
1570 && flag_pic
1571 && ! ((mode == Pmode || mode == ptr_mode)
1572 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1574 rtx temp;
1575 if (SYMBOLIC_CONST_P (operands[1]))
1577 if (MEM_P (operands[0]))
1578 operands[1] = force_reg (Pmode, operands[1]);
1579 else
1581 temp = (!can_create_pseudo_p ()
1582 ? operands[0]
1583 : gen_reg_rtx (Pmode));
1584 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1587 else if (GET_CODE (operands[1]) == CONST
1588 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1589 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1591 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1592 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1593 mode, temp);
1594 operands[1] = expand_binop (mode, add_optab, temp,
1595 XEXP (XEXP (operands[1], 0), 1),
1596 (!can_create_pseudo_p ()
1597 ? temp
1598 : gen_reg_rtx (Pmode)),
1599 0, OPTAB_LIB_WIDEN);
1603 if (! reload_in_progress && ! reload_completed)
1605 /* Copy the source to a register if both operands aren't registers. */
1606 if (! register_operand (operands[0], mode)
1607 && ! register_operand (operands[1], mode))
1608 operands[1] = copy_to_mode_reg (mode, operands[1]);
1610 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1612 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1613 except that we can't use that function because it is static. */
1614 rtx new_rtx = change_address (operands[0], mode, 0);
1615 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1616 operands[0] = new_rtx;
1619 /* This case can happen while generating code to move the result
1620 of a library call to the target. Reject `st r0,@(rX,rY)' because
1621 reload will fail to find a spill register for rX, since r0 is already
1622 being used for the source. */
1623 else if (refers_to_regno_p (R0_REG, operands[1])
1624 && MEM_P (operands[0])
1625 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1626 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1627 operands[1] = copy_to_mode_reg (mode, operands[1]);
1629 /* When the displacement addressing is used, RA will assign r0 to
1630 the pseudo register operand for the QI/HImode load/store.
1631 This tends to make a long live range for R0 and might cause
1632 anomalous register spills in some case with LRA. See PR
1633 target/55212.
1634 We split possible load/store to two move insns via r0 so as to
1635 shorten R0 live range. It will make some codes worse but will
1636 win on average for LRA.
1637 Also when base+index addressing is used and the index term is
1638 a subreg, LRA assumes that more hard registers can be available
1639 in some situation. It isn't the case for SH in the problematic
1640 case. We can pre-allocate R0 for that index term to avoid
1641 the issue. See PR target/66591. */
1642 else if (sh_lra_p ()
1643 && ! TARGET_SH2A
1644 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1645 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1647 bool load_p = REG_P (operands[0]);
1648 rtx reg = operands[load_p ? 0 : 1];
1649 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1651 if ((mode == QImode || mode == HImode)
1652 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1653 && GET_CODE (adr) == PLUS
1654 && REG_P (XEXP (adr, 0))
1655 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1656 && CONST_INT_P (XEXP (adr, 1))
1657 && INTVAL (XEXP (adr, 1)) != 0
1658 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1660 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1661 emit_move_insn (r0_rtx, operands[1]);
1662 operands[1] = r0_rtx;
1664 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1665 && GET_CODE (adr) == PLUS
1666 && REG_P (XEXP (adr, 0))
1667 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1668 && SUBREG_P (XEXP (adr, 1))
1669 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1671 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1672 emit_move_insn (r0_rtx, XEXP (adr, 1));
1673 XEXP (adr, 1) = r0_rtx;
1678 if (mode == Pmode || mode == ptr_mode)
1680 rtx op0 = operands[0];
1681 rtx op1 = operands[1];
1682 rtx opc;
1683 if (GET_CODE (op1) == CONST
1684 && GET_CODE (XEXP (op1, 0)) == PLUS
1685 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1686 != TLS_MODEL_NONE))
1688 opc = XEXP (XEXP (op1, 0), 1);
1689 op1 = XEXP (XEXP (op1, 0), 0);
1691 else
1692 opc = NULL_RTX;
1694 enum tls_model tls_kind;
1696 if (! reload_in_progress && ! reload_completed
1697 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1699 rtx tga_op1, tga_ret, tmp, tmp2;
1701 if (! flag_pic
1702 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1703 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1704 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1706 static int got_labelno;
1707 /* Don't schedule insns for getting GOT address when
1708 the first scheduling is enabled, to avoid spill
1709 failures for R0. */
1710 if (flag_schedule_insns)
1711 emit_insn (gen_blockage ());
1712 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1713 emit_use (gen_rtx_REG (SImode, PIC_REG));
1714 if (flag_schedule_insns)
1715 emit_insn (gen_blockage ());
1718 switch (tls_kind)
1720 case TLS_MODEL_GLOBAL_DYNAMIC:
1721 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1722 if (TARGET_FDPIC)
1723 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1724 sh_get_fdpic_reg_initial_val ());
1725 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1726 tmp = gen_reg_rtx (Pmode);
1727 emit_move_insn (tmp, tga_ret);
1728 op1 = tmp;
1729 break;
1731 case TLS_MODEL_LOCAL_DYNAMIC:
1732 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1733 if (TARGET_FDPIC)
1734 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1735 sh_get_fdpic_reg_initial_val ());
1736 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1738 tmp = gen_reg_rtx (Pmode);
1739 emit_move_insn (tmp, tga_ret);
1741 if (register_operand (op0, Pmode))
1742 tmp2 = op0;
1743 else
1744 tmp2 = gen_reg_rtx (Pmode);
1746 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1747 op1 = tmp2;
1748 break;
1750 case TLS_MODEL_INITIAL_EXEC:
1751 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1752 tmp = gen_sym2GOTTPOFF (op1);
1753 if (TARGET_FDPIC)
1754 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1755 sh_get_fdpic_reg_initial_val ());
1756 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1757 op1 = tga_op1;
1758 break;
1760 case TLS_MODEL_LOCAL_EXEC:
1761 tmp2 = gen_reg_rtx (Pmode);
1762 emit_insn (gen_store_gbr (tmp2));
1763 tmp = gen_reg_rtx (Pmode);
1764 emit_insn (gen_symTPOFF2reg (tmp, op1));
1766 if (register_operand (op0, Pmode))
1767 op1 = op0;
1768 else
1769 op1 = gen_reg_rtx (Pmode);
1771 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1772 break;
1774 default:
1775 gcc_unreachable ();
1777 if (opc)
1778 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1779 operands[1] = op1;
1783 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
1785 rtx base, offset;
1786 split_const (operands[1], &base, &offset);
1788 if (GET_CODE (base) == SYMBOL_REF
1789 && !offset_within_block_p (base, INTVAL (offset)))
1791 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
1792 emit_move_insn (tmp, base);
1793 if (!arith_operand (offset, mode))
1794 offset = force_reg (mode, offset);
1795 emit_insn (gen_add3_insn (operands[0], tmp, offset));
1800 /* Implement the canonicalize_comparison target hook for the combine
1801 pass. For the target hook this function is invoked via
1802 sh_canonicalize_comparison. This function is also re-used to
1803 canonicalize comparisons in cbranch pattern expanders. */
1804 static void
1805 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1806 machine_mode mode,
1807 bool op0_preserve_value)
1809 /* When invoked from within the combine pass the mode is not specified,
1810 so try to get it from one of the operands. */
1811 if (mode == VOIDmode)
1812 mode = GET_MODE (op0);
1813 if (mode == VOIDmode)
1814 mode = GET_MODE (op1);
1816 // We need to have a mode to do something useful here.
1817 if (mode == VOIDmode)
1818 return;
1820 // Currently, we don't deal with floats here.
1821 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1822 return;
1824 // Make sure that the constant operand is the second operand.
1825 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1827 if (op0_preserve_value)
1828 return;
1830 std::swap (op0, op1);
1831 cmp = swap_condition (cmp);
1834 if (CONST_INT_P (op1))
1836 /* Try to adjust the constant operand in such a way that available
1837 comparison insns can be utilized better and the constant can be
1838 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1839 constant pool. */
1840 const HOST_WIDE_INT val = INTVAL (op1);
1842 /* x > -1 --> x >= 0
1843 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1844 x <= -1 --> x < 0
1845 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1846 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1848 cmp = cmp == GT ? GE : LT;
1849 op1 = gen_int_mode (val + 1, mode);
1852 /* x >= 1 --> x > 0
1853 x >= 0x80 --> x > 0x7F
1854 x < 1 --> x <= 0
1855 x < 0x80 --> x <= 0x7F */
1856 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1858 cmp = cmp == GE ? GT : LE;
1859 op1 = gen_int_mode (val - 1, mode);
1862 /* unsigned x >= 1 --> x != 0
1863 unsigned x < 1 --> x == 0 */
1864 else if (val == 1 && (cmp == GEU || cmp == LTU))
1866 cmp = cmp == GEU ? NE : EQ;
1867 op1 = CONST0_RTX (mode);
1870 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1871 unsigned x < 0x80 --> unsigned x < 0x7F */
1872 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1874 cmp = cmp == GEU ? GTU : LEU;
1875 op1 = gen_int_mode (val - 1, mode);
1878 /* unsigned x > 0 --> x != 0
1879 unsigned x <= 0 --> x == 0 */
1880 else if (val == 0 && (cmp == GTU || cmp == LEU))
1881 cmp = cmp == GTU ? NE : EQ;
1883 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1884 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1885 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1886 && val == 0x7FFFFFFF)
1888 cmp = cmp == GTU ? LT : GE;
1889 op1 = const0_rtx;
1892 /* unsigned x >= 0x80000000 --> signed x < 0
1893 unsigned x < 0x80000000 --> signed x >= 0 */
1894 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1895 && (unsigned HOST_WIDE_INT)val
1896 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1898 cmp = cmp == GEU ? LT : GE;
1899 op1 = const0_rtx;
1904 /* This function implements the canonicalize_comparison target hook.
1905 This wrapper around the internally used sh_canonicalize_comparison
1906 function is needed to do the enum rtx_code <-> int conversion.
1907 Target hooks cannot use enum rtx_code in its definition. */
1908 static void
1909 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1910 bool op0_preserve_value)
1912 enum rtx_code tmp_code = (enum rtx_code)*code;
1913 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1914 VOIDmode, op0_preserve_value);
1915 *code = (int)tmp_code;
1918 /* This function implements the legitimate_combined_insn target hook,
1919 which the combine pass uses to early reject combined insns, before
1920 it tries to recog the insn and determine its cost. */
1921 static bool
1922 sh_legitimate_combined_insn (rtx_insn* insn)
1924 /* Reject combinations of memory loads and zero extensions, as these
1925 interfere with other combine patterns such as zero extracts and bit
1926 tests. The SH2A movu.{b|w} insns are formed later in the
1927 'sh_optimize_extu_exts' pass after combine/split1. */
1928 rtx p = PATTERN (insn);
1929 if (GET_CODE (p) == SET
1930 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
1931 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
1932 && MEM_P (XEXP (XEXP (p, 1), 0)))
1933 return false;
1935 return true;
1938 bool
1939 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1941 *p1 = T_REG;
1942 *p2 = INVALID_REGNUM;
1943 return true;
1946 /* Try to calculate the branch distance of a conditional branch in bytes.
1948 FIXME: Because of PR 59189 we can't use the CFG here. Instead just
1949 walk from this insn into the next (fall-through) basic block and see if
1950 we hit the label. */
1951 unsigned int
1952 sh_cbranch_distance (rtx_insn* _cbranch_insn, unsigned int max_dist)
1954 rtx_jump_insn* cbranch_insn = safe_as_a<rtx_jump_insn*> (_cbranch_insn);
1956 if (dump_file)
1958 fprintf (dump_file, "sh_cbranch_distance insn = \n");
1959 print_rtl_single (dump_file, cbranch_insn);
1962 unsigned int dist = 0;
1964 for (rtx_insn* i = next_nonnote_insn (cbranch_insn);
1965 i != NULL && dist < max_dist; i = next_nonnote_insn (i))
1967 const unsigned int i_len = get_attr_length (i);
1968 dist += i_len;
1970 if (dump_file)
1971 fprintf (dump_file, " insn %d length = %u dist = %u\n",
1972 INSN_UID (i), i_len, dist);
1974 if (rtx_code_label* l = dyn_cast<rtx_code_label*> (i))
1976 if (l == cbranch_insn->jump_target ())
1978 if (dump_file)
1979 fprintf (dump_file, " cbranch dist = %u\n", dist);
1980 return dist;
1982 break;
1986 if (dump_file)
1987 fprintf (dump_file, " cbranch dist = unknown\n");
1989 return unknown_cbranch_distance;
1992 enum rtx_code
1993 prepare_cbranch_operands (rtx *operands, machine_mode mode,
1994 enum rtx_code comparison)
1996 gcc_assert (can_create_pseudo_p ());
1998 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1999 comparison = GET_CODE (operands[0]);
2001 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2002 mode, false);
2004 rtx op1 = operands[1];
2005 operands[1] = force_reg (mode, op1);
2007 /* When we are handling DImode comparisons, we want to keep constants so
2008 that we can optimize the component comparisons; however, memory loads
2009 are better issued as a whole so that they can be scheduled well.
2010 SImode equality comparisons allow I08 constants, but only when they
2011 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2012 into a register, that register might as well be r0, and we allow the
2013 constant. If it is already in a register, this is likely to be
2014 allocated to a different hard register, thus we load the constant into
2015 a register unless it is zero. */
2016 if (!REG_P (operands[2])
2017 && (!CONST_INT_P (operands[2])
2018 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2019 && ((comparison != EQ && comparison != NE)
2020 || (REG_P (op1) && REGNO (op1) != R0_REG)
2021 || !satisfies_constraint_I08 (operands[2])))))
2022 operands[2] = force_reg (mode, operands[2]);
2024 return comparison;
2027 static void
2028 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison,
2029 profile_probability probability)
2031 rtx (*branch_expander) (rtx) = gen_branch_true;
2032 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2033 switch (comparison)
2035 case NE: case LT: case LE: case LTU: case LEU:
2036 comparison = reverse_condition (comparison);
2037 branch_expander = gen_branch_false;
2038 default: ;
2040 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2041 gen_rtx_fmt_ee (comparison, SImode,
2042 operands[1], operands[2])));
2043 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2044 if (probability.initialized_p ())
2045 add_reg_br_prob_note (jump, probability);
2048 void
2049 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison)
2051 expand_cbranchsi4 (operands, comparison,
2052 profile_probability::uninitialized ());
2055 /* ??? How should we distribute probabilities when more than one branch
2056 is generated. So far we only have some ad-hoc observations:
2057 - If the operands are random, they are likely to differ in both parts.
2058 - If comparing items in a hash chain, the operands are random or equal;
2059 operation should be EQ or NE.
2060 - If items are searched in an ordered tree from the root, we can expect
2061 the highpart to be unequal about half of the time; operation should be
2062 an inequality comparison, operands non-constant, and overall probability
2063 about 50%. Likewise for quicksort.
2064 - Range checks will be often made against constants. Even if we assume for
2065 simplicity an even distribution of the non-constant operand over a
2066 sub-range here, the same probability could be generated with differently
2067 wide sub-ranges - as long as the ratio of the part of the subrange that
2068 is before the threshold to the part that comes after the threshold stays
2069 the same. Thus, we can't really tell anything here;
2070 assuming random distribution is at least simple.
2072 bool
2073 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2075 enum rtx_code msw_taken, msw_skip, lsw_taken;
2076 rtx_code_label *skip_label = NULL;
2077 rtx op1h, op1l, op2h, op2l;
2078 int num_branches;
2079 profile_probability prob, rev_prob;
2080 profile_probability msw_taken_prob = profile_probability::uninitialized (),
2081 msw_skip_prob = profile_probability::uninitialized (),
2082 lsw_taken_prob = profile_probability::uninitialized ();
2084 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2085 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2086 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2087 op1l = gen_lowpart (SImode, operands[1]);
2088 op2l = gen_lowpart (SImode, operands[2]);
2089 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2090 prob = split_branch_probability;
2091 rev_prob = prob.invert ();
2092 switch (comparison)
2094 case EQ:
2095 msw_skip = NE;
2096 lsw_taken = EQ;
2097 if (prob.initialized_p ())
2099 /* FIXME: This is not optimal. We do not really know the probablity
2100 that values differ by MCW only, but we should probably distribute
2101 probabilities more evenly. */
2102 msw_skip_prob = rev_prob;
2103 lsw_taken_prob = prob > profile_probability::never ()
2104 ? profile_probability::guessed_always ()
2105 : profile_probability::guessed_never ();
2107 break;
2108 case NE:
2109 msw_taken = NE;
2110 msw_taken_prob = prob;
2111 lsw_taken = NE;
2112 lsw_taken_prob = profile_probability::guessed_never ();
2113 break;
2114 case GTU: case GT:
2115 msw_taken = comparison;
2116 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2117 break;
2118 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2119 msw_skip = swap_condition (msw_taken);
2120 lsw_taken = GTU;
2121 break;
2122 case GEU: case GE:
2123 if (op2l == CONST0_RTX (SImode))
2124 msw_taken = comparison;
2125 else
2127 msw_taken = comparison == GE ? GT : GTU;
2128 msw_skip = swap_condition (msw_taken);
2129 lsw_taken = GEU;
2131 break;
2132 case LTU: case LT:
2133 msw_taken = comparison;
2134 if (op2l == CONST0_RTX (SImode))
2135 break;
2136 msw_skip = swap_condition (msw_taken);
2137 lsw_taken = LTU;
2138 break;
2139 case LEU: case LE:
2140 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2141 msw_taken = comparison;
2142 else
2144 lsw_taken = LEU;
2145 if (comparison == LE)
2146 msw_taken = LT;
2147 else if (op2h != CONST0_RTX (SImode))
2148 msw_taken = LTU;
2149 else
2151 msw_skip = swap_condition (LTU);
2152 break;
2154 msw_skip = swap_condition (msw_taken);
2156 break;
2157 default: return false;
2159 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2160 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2161 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2162 if (comparison != EQ && comparison != NE && num_branches > 1)
2164 if (!CONSTANT_P (operands[2])
2165 && prob.initialized_p ()
2166 && prob.to_reg_br_prob_base () >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2167 && prob.to_reg_br_prob_base () <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2169 msw_taken_prob = prob.apply_scale (1, 2);
2170 msw_skip_prob = rev_prob.apply_scale (REG_BR_PROB_BASE,
2171 rev_prob.to_reg_br_prob_base ()
2172 + REG_BR_PROB_BASE);
2173 lsw_taken_prob = prob;
2175 else
2177 msw_taken_prob = prob;
2178 msw_skip_prob = profile_probability::guessed_always ();
2179 /* ??? If we have a constant op2h, should we use that when
2180 calculating lsw_taken_prob? */
2181 lsw_taken_prob = prob;
2184 operands[1] = op1h;
2185 operands[2] = op2h;
2187 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2188 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2189 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2191 rtx taken_label = operands[3];
2193 /* Operands were possibly modified, but msw_skip doesn't expect this.
2194 Always use the original ones. */
2195 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2197 operands[1] = op1h;
2198 operands[2] = op2h;
2201 operands[3] = skip_label = gen_label_rtx ();
2202 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2203 operands[3] = taken_label;
2205 operands[1] = op1l;
2206 operands[2] = op2l;
2207 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2208 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2209 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2210 emit_label (skip_label);
2211 return true;
2214 /* Given an operand, return 1 if the evaluated operand plugged into an
2215 if_then_else will result in a branch_true, 0 if branch_false, or
2216 -1 if neither nor applies. The truth table goes like this:
2218 op | cmpval | code | result
2219 ---------+--------+---------+--------------------
2220 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2221 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2222 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2223 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2224 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2225 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2226 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2227 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2229 sh_eval_treg_value (rtx op)
2231 if (t_reg_operand (op, GET_MODE (op)))
2232 return 1;
2233 if (negt_reg_operand (op, GET_MODE (op)))
2234 return 0;
2236 rtx_code code = GET_CODE (op);
2237 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2238 return -1;
2240 int cmpop = code == EQ ? 1 : 0;
2241 int cmpval = INTVAL (XEXP (op, 1));
2242 if (cmpval != 0 && cmpval != 1)
2243 return -1;
2245 int t;
2246 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2247 t = 0;
2248 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2249 t = 1;
2250 else
2251 return -1;
2253 return t ^ (cmpval == cmpop);
2256 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2257 of floating-point comparisons. */
2258 static void
2259 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2261 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2262 && GET_CODE (insn) != PARALLEL)
2264 insn = gen_rtx_PARALLEL (VOIDmode,
2265 gen_rtvec (3, insn,
2266 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2267 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2269 emit_insn (insn);
2272 /* Prepare the operands for an scc instruction; make sure that the
2273 compare has been done and the result is in T_REG. */
2274 void
2275 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2277 rtx t_reg = get_t_reg_rtx ();
2278 enum rtx_code oldcode = code;
2280 /* First need a compare insn. */
2281 switch (code)
2283 case NE:
2284 /* It isn't possible to handle this case. */
2285 gcc_unreachable ();
2286 case LT:
2287 code = GT;
2288 break;
2289 case LE:
2290 code = GE;
2291 break;
2292 case LTU:
2293 code = GTU;
2294 break;
2295 case LEU:
2296 code = GEU;
2297 break;
2298 default:
2299 break;
2301 if (code != oldcode)
2302 std::swap (op0, op1);
2304 machine_mode mode = GET_MODE (op0);
2305 if (mode == VOIDmode)
2306 mode = GET_MODE (op1);
2308 op0 = force_reg (mode, op0);
2309 if ((code != EQ && code != NE
2310 && (op1 != const0_rtx
2311 || code == GTU || code == GEU || code == LTU || code == LEU))
2312 || (mode == DImode && op1 != const0_rtx)
2313 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2314 op1 = force_reg (mode, op1);
2316 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2317 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2318 mode);
2321 /* Called from the md file, set up the operands of a compare instruction. */
2322 void
2323 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2325 enum rtx_code code = GET_CODE (operands[0]);
2326 enum rtx_code branch_code;
2327 rtx op0 = operands[1];
2328 rtx op1 = operands[2];
2329 rtx insn;
2330 bool need_ccmpeq = false;
2332 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2334 op0 = force_reg (mode, op0);
2335 op1 = force_reg (mode, op1);
2337 else
2339 if (code != EQ || mode == DImode)
2341 /* Force args into regs, since we can't use constants here. */
2342 op0 = force_reg (mode, op0);
2343 if (op1 != const0_rtx || code == GTU || code == GEU)
2344 op1 = force_reg (mode, op1);
2348 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2350 if (code == LT
2351 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2352 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2354 std::swap (op0, op1);
2355 code = swap_condition (code);
2358 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2359 if (code == GE)
2361 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2362 need_ccmpeq = true;
2363 code = GT;
2366 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2367 to EQ/GT respectively. */
2368 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2371 switch (code)
2373 case EQ:
2374 case GT:
2375 case GE:
2376 case GTU:
2377 case GEU:
2378 branch_code = code;
2379 break;
2380 case NE:
2381 case LT:
2382 case LE:
2383 case LTU:
2384 case LEU:
2385 branch_code = reverse_condition (code);
2386 break;
2387 default:
2388 gcc_unreachable ();
2391 insn = gen_rtx_SET (get_t_reg_rtx (),
2392 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2394 sh_emit_set_t_insn (insn, mode);
2395 if (need_ccmpeq)
2396 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2398 if (branch_code == code)
2399 emit_jump_insn (gen_branch_true (operands[3]));
2400 else
2401 emit_jump_insn (gen_branch_false (operands[3]));
2404 void
2405 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2407 enum rtx_code code = GET_CODE (operands[1]);
2408 rtx op0 = operands[2];
2409 rtx op1 = operands[3];
2410 rtx_code_label *lab = NULL;
2411 bool invert = false;
2413 op0 = force_reg (mode, op0);
2414 if ((code != EQ && code != NE
2415 && (op1 != const0_rtx
2416 || code == GTU || code == GEU || code == LTU || code == LEU))
2417 || (mode == DImode && op1 != const0_rtx)
2418 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2419 op1 = force_reg (mode, op1);
2421 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2423 if (code == LT || code == LE)
2425 std::swap (op0, op1);
2426 code = swap_condition (code);
2428 if (code == GE)
2430 if (TARGET_IEEE)
2432 lab = gen_label_rtx ();
2433 sh_emit_scc_to_t (EQ, op0, op1);
2434 emit_jump_insn (gen_branch_true (lab));
2435 code = GT;
2437 else
2439 code = LT;
2440 invert = true;
2445 if (code == NE)
2447 code = EQ;
2448 invert = true;
2451 sh_emit_scc_to_t (code, op0, op1);
2452 if (lab)
2453 emit_label (lab);
2454 if (invert)
2455 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2456 else
2457 emit_move_insn (operands[0], get_t_reg_rtx ());
2460 /* Functions to output assembly code. */
2462 /* Return a sequence of instructions to perform DI or DF move.
2464 Since the SH cannot move a DI or DF in one instruction, we have
2465 to take care when we see overlapping source and dest registers. */
2466 const char *
2467 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2468 machine_mode mode)
2470 rtx dst = operands[0];
2471 rtx src = operands[1];
2473 if (MEM_P (dst)
2474 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2475 return "mov.l %T1,%0" "\n"
2476 " mov.l %1,%0";
2478 if (register_operand (dst, mode)
2479 && register_operand (src, mode))
2481 if (REGNO (src) == MACH_REG)
2482 return "sts mach,%S0" "\n"
2483 " sts macl,%R0";
2485 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2486 when mov.d r1,r0 do r1->r0 then r2->r1. */
2487 if (REGNO (src) + 1 == REGNO (dst))
2488 return "mov %T1,%T0" "\n"
2489 " mov %1,%0";
2490 else
2491 return "mov %1,%0" "\n"
2492 " mov %T1,%T0";
2494 else if (CONST_INT_P (src))
2496 if (INTVAL (src) < 0)
2497 output_asm_insn ("mov #-1,%S0", operands);
2498 else
2499 output_asm_insn ("mov #0,%S0", operands);
2501 return "mov %1,%R0";
2503 else if (MEM_P (src))
2505 int ptrreg = -1;
2506 int dreg = REGNO (dst);
2507 rtx inside = XEXP (src, 0);
2509 switch (GET_CODE (inside))
2511 case REG:
2512 ptrreg = REGNO (inside);
2513 break;
2515 case SUBREG:
2516 ptrreg = subreg_regno (inside);
2517 break;
2519 case PLUS:
2520 ptrreg = REGNO (XEXP (inside, 0));
2521 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2522 an offsettable address. Unfortunately, offsettable addresses use
2523 QImode to check the offset, and a QImode offsettable address
2524 requires r0 for the other operand, which is not currently
2525 supported, so we can't use the 'o' constraint.
2526 Thus we must check for and handle r0+REG addresses here.
2527 We punt for now, since this is likely very rare. */
2528 gcc_assert (!REG_P (XEXP (inside, 1)));
2529 break;
2531 case LABEL_REF:
2532 return "mov.l %1,%0" "\n"
2533 " mov.l %1+4,%T0";
2534 case POST_INC:
2535 return "mov.l %1,%0" "\n"
2536 " mov.l %1,%T0";
2537 default:
2538 gcc_unreachable ();
2541 /* Work out the safe way to copy. Copy into the second half first. */
2542 if (dreg == ptrreg)
2543 return "mov.l %T1,%T0" "\n"
2544 " mov.l %1,%0";
2547 return "mov.l %1,%0" "\n"
2548 " mov.l %T1,%T0";
2551 /* Print an instruction which would have gone into a delay slot after
2552 another instruction, but couldn't because the other instruction expanded
2553 into a sequence where putting the slot insn at the end wouldn't work. */
2554 static void
2555 print_slot (rtx_sequence *seq)
2557 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2559 seq->insn (1)->set_deleted ();
2562 const char *
2563 output_far_jump (rtx_insn *insn, rtx op)
2565 struct { rtx lab, reg, op; } this_jmp;
2566 rtx_code_label *braf_base_lab = NULL;
2567 const char *jump;
2568 int far;
2569 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2570 rtx_insn *prev;
2572 this_jmp.lab = gen_label_rtx ();
2574 if (TARGET_SH2
2575 && offset >= -32764
2576 && offset - get_attr_length (insn) <= 32766
2577 && ! CROSSING_JUMP_P (insn))
2579 far = 0;
2580 jump = "mov.w %O0,%1" "\n"
2581 " braf %1";
2583 else
2585 far = 1;
2586 if (flag_pic)
2588 if (TARGET_SH2)
2589 jump = "mov.l %O0,%1" "\n"
2590 " braf %1";
2591 else
2592 jump = "mov.l r0,@-r15" "\n"
2593 " mova %O0,r0" "\n"
2594 " mov.l @r0,%1" "\n"
2595 " add r0,%1" "\n"
2596 " mov.l @r15+,r0" "\n"
2597 " jmp @%1";
2599 else
2600 jump = "mov.l %O0,%1" "\n"
2601 " jmp @%1";
2603 /* If we have a scratch register available, use it. */
2604 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2605 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2607 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2608 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2609 jump = "mov.l r1,@-r15" "\n"
2610 " mova %O0,r0" "\n"
2611 " mov.l @r0,r1" "\n"
2612 " add r1,r0" "\n"
2613 " mov.l @r15+,r1" "\n"
2614 " jmp @%1";
2615 output_asm_insn (jump, &this_jmp.lab);
2616 if (dbr_sequence_length ())
2617 print_slot (final_sequence);
2618 else
2619 output_asm_insn ("nop", 0);
2621 else
2623 /* Output the delay slot insn first if any. */
2624 if (dbr_sequence_length ())
2625 print_slot (final_sequence);
2627 this_jmp.reg = gen_rtx_REG (SImode, 13);
2628 output_asm_insn ("mov.l r13,@-r15", 0);
2629 output_asm_insn (jump, &this_jmp.lab);
2630 output_asm_insn ("mov.l @r15+,r13", 0);
2632 if (far && flag_pic && TARGET_SH2)
2634 braf_base_lab = gen_label_rtx ();
2635 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2636 CODE_LABEL_NUMBER (braf_base_lab));
2638 if (far)
2639 output_asm_insn (".align 2", 0);
2640 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2641 this_jmp.op = op;
2642 if (far && flag_pic)
2644 if (TARGET_SH2)
2645 this_jmp.lab = braf_base_lab;
2646 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2648 else
2649 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2650 return "";
2653 /* Local label counter, used for constants in the pool and inside
2654 pattern branches. */
2655 static int lf = 100;
2657 /* Output code for ordinary branches. */
2658 const char *
2659 output_branch (int logic, rtx_insn *insn, rtx *operands)
2661 switch (get_attr_length (insn))
2663 case 6:
2664 /* This can happen if filling the delay slot has caused a forward
2665 branch to exceed its range (we could reverse it, but only
2666 when we know we won't overextend other branches; this should
2667 best be handled by relaxation).
2668 It can also happen when other condbranches hoist delay slot insn
2669 from their destination, thus leading to code size increase.
2670 But the branch will still be in the range -4092..+4098 bytes. */
2671 if (! TARGET_RELAX)
2673 int label = lf++;
2674 /* The call to print_slot will clobber the operands. */
2675 rtx op0 = operands[0];
2677 /* If the instruction in the delay slot is annulled (true), then
2678 there is no delay slot where we can put it now. The only safe
2679 place for it is after the label. final will do that by default. */
2681 if (final_sequence
2682 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2683 && get_attr_length (final_sequence->insn (1)))
2685 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2686 ASSEMBLER_DIALECT ? "/" : ".", label);
2687 print_slot (final_sequence);
2689 else
2690 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2692 output_asm_insn ("bra\t%l0", &op0);
2693 fprintf (asm_out_file, "\tnop\n");
2694 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2696 return "";
2698 /* FALLTHRU */
2699 /* When relaxing, handle this like a short branch. The linker
2700 will fix it up if it still doesn't fit after relaxation. */
2701 case 2:
2702 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2704 /* These are for SH2e, in which we have to account for the
2705 extra nop because of the hardware bug in annulled branches. */
2706 case 8:
2707 if (! TARGET_RELAX)
2709 int label = lf++;
2711 gcc_assert (!final_sequence
2712 || !(INSN_ANNULLED_BRANCH_P
2713 (XVECEXP (final_sequence, 0, 0))));
2714 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2715 logic ? "f" : "t",
2716 ASSEMBLER_DIALECT ? "/" : ".", label);
2717 fprintf (asm_out_file, "\tnop\n");
2718 output_asm_insn ("bra\t%l0", operands);
2719 fprintf (asm_out_file, "\tnop\n");
2720 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2722 return "";
2724 /* FALLTHRU */
2725 case 4:
2727 char buffer[10];
2729 sprintf (buffer, "b%s%ss\t%%l0",
2730 logic ? "t" : "f",
2731 ASSEMBLER_DIALECT ? "/" : ".");
2732 output_asm_insn (buffer, &operands[0]);
2733 return "nop";
2736 default:
2737 /* There should be no longer branches now - that would
2738 indicate that something has destroyed the branches set
2739 up in machine_dependent_reorg. */
2740 gcc_unreachable ();
2744 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2745 fill in operands 9 as a label to the successor insn.
2746 We try to use jump threading where possible.
2747 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2748 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2749 follow jmp and bt, if the address is in range. */
2750 const char *
2751 output_branchy_insn (enum rtx_code code, const char *templ,
2752 rtx_insn *insn, rtx *operands)
2754 rtx_insn *next_insn = NEXT_INSN (insn);
2756 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2758 rtx src = SET_SRC (PATTERN (next_insn));
2759 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2761 /* Following branch not taken */
2762 rtx_code_label *lab = gen_label_rtx ();
2763 emit_label_after (lab, next_insn);
2764 INSN_ADDRESSES_NEW (lab,
2765 INSN_ADDRESSES (INSN_UID (next_insn))
2766 + get_attr_length (next_insn));
2767 operands[9] = lab;
2768 return templ;
2770 else
2772 int offset = (branch_dest (next_insn)
2773 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2774 if (offset >= -252 && offset <= 258)
2776 if (GET_CODE (src) == IF_THEN_ELSE)
2777 /* branch_true */
2778 src = XEXP (src, 1);
2779 operands[9] = src;
2780 return templ;
2784 rtx_code_label *lab = gen_label_rtx ();
2785 emit_label_after (lab, insn);
2786 INSN_ADDRESSES_NEW (lab,
2787 INSN_ADDRESSES (INSN_UID (insn))
2788 + get_attr_length (insn));
2789 operands[9] = lab;
2790 return templ;
2793 const char *
2794 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2796 return output_branchy_insn (NE, "bt %l9" "\n"
2797 " fcmp/eq %1,%0",
2798 insn, operands);
2801 /* Output the start of the assembler file. */
2802 static void
2803 sh_file_start (void)
2805 default_file_start ();
2807 if (TARGET_ELF)
2808 /* We need to show the text section with the proper
2809 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2810 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2811 will complain. We can teach GAS specifically about the
2812 default attributes for our choice of text section, but
2813 then we would have to change GAS again if/when we change
2814 the text section name. */
2815 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2816 else
2817 /* Switch to the data section so that the coffsem symbol
2818 isn't in the text section. */
2819 switch_to_section (data_section);
2821 if (TARGET_LITTLE_ENDIAN)
2822 fputs ("\t.little\n", asm_out_file);
2825 /* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions
2826 need to be output as pointers to function descriptors for
2827 FDPIC. */
2829 static bool
2830 sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
2832 if (TARGET_FDPIC && size == UNITS_PER_WORD
2833 && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value))
2835 fputs ("\t.long\t", asm_out_file);
2836 output_addr_const (asm_out_file, value);
2837 fputs ("@FUNCDESC\n", asm_out_file);
2838 return true;
2840 return default_assemble_integer (value, size, aligned_p);
2843 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2844 static bool
2845 unspec_caller_rtx_p (rtx pat)
2847 rtx base, offset;
2848 split_const (pat, &base, &offset);
2850 if (GET_CODE (base) == UNSPEC)
2852 if (XINT (base, 1) == UNSPEC_CALLER)
2853 return true;
2854 for (int i = 0; i < XVECLEN (base, 0); i++)
2855 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2856 return true;
2858 return false;
2861 /* Indicate that INSN cannot be duplicated. This is true for insn
2862 that generates a unique label. */
2863 static bool
2864 sh_cannot_copy_insn_p (rtx_insn *insn)
2866 if (!reload_completed || !flag_pic)
2867 return false;
2869 if (!NONJUMP_INSN_P (insn))
2870 return false;
2871 if (asm_noperands (insn) >= 0)
2872 return false;
2874 rtx pat = PATTERN (insn);
2876 if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
2877 return false;
2879 if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL)
2881 rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
2882 if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0)))
2883 return true;
2886 if (GET_CODE (pat) != SET)
2887 return false;
2888 pat = SET_SRC (pat);
2890 if (unspec_caller_rtx_p (pat))
2891 return true;
2893 return false;
2896 /* Number of instructions used to make an arithmetic right shift by N. */
2897 static const char ashiftrt_insns[] =
2898 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2900 /* Description of a logical left or right shift, when expanded to a sequence
2901 of 1/2/8/16 shifts.
2902 Notice that one bit right shifts clobber the T bit. One bit left shifts
2903 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2904 enum
2906 ASHL_CLOBBERS_T = 1 << 0,
2907 LSHR_CLOBBERS_T = 1 << 1
2910 struct ashl_lshr_sequence
2912 char insn_count;
2913 signed char amount[6];
2914 char clobbers_t;
2917 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2919 { 0, { 0 }, 0 }, // 0
2920 { 1, { 1 }, LSHR_CLOBBERS_T },
2921 { 1, { 2 }, 0 },
2922 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2923 { 2, { 2, 2 }, 0 }, // 4
2924 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2925 { 3, { 2, 2, 2 }, 0 },
2926 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2927 { 1, { 8 }, 0 }, // 8
2928 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2929 { 2, { 8, 2 }, 0 },
2930 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2931 { 3, { 8, 2, 2 }, 0 }, // 12
2932 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2933 { 3, { 8, -2, 8 }, 0 },
2934 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2935 { 1, { 16 }, 0 }, // 16
2936 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2937 { 2, { 16, 2 }, 0 },
2938 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2939 { 3, { 16, 2, 2 }, 0 }, // 20
2940 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2941 { 3, { 16, -2, 8 }, 0 },
2942 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2943 { 2, { 16, 8 }, 0 }, // 24
2944 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2945 { 3, { 16, 8, 2 }, 0 },
2946 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2947 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2948 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2949 { 3, { 16, -2, 16 }, 0 },
2951 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2952 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2953 However, the shift-and combiner code needs this entry here to be in
2954 terms of real shift insns. */
2955 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2958 /* Individual shift amounts for shift amounts < 16, up to three highmost
2959 bits might be clobbered. This is typically used when combined with some
2960 kind of sign or zero extension. */
2961 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
2963 { 0, { 0 }, 0 }, // 0
2964 { 1, { 1 }, LSHR_CLOBBERS_T },
2965 { 1, { 2 }, 0 },
2966 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2967 { 2, { 2, 2 }, 0 }, // 4
2968 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2969 { 2, { 8, -2 }, 0 },
2970 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
2971 { 1, { 8 }, 0 }, // 8
2972 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2973 { 2, { 8, 2 }, 0 },
2974 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2975 { 3, { 8, 2, 2 }, 0 }, // 12
2976 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
2977 { 2, { 16, -2 }, 0 },
2978 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
2979 { 1, { 16 }, 0 }, // 16
2980 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2981 { 2, { 16, 2 }, 0 },
2982 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2983 { 3, { 16, 2, 2 }, 0 }, // 20
2984 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2985 { 3, { 16, -2, 8 }, 0 },
2986 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2987 { 2, { 16, 8 }, 0 }, // 24
2988 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2989 { 3, { 16, 8, 2 }, 0 },
2990 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2991 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2992 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2993 { 3, { 16, -2, 16 }, 0 },
2994 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2997 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
2998 will clobber the T bit. */
2999 bool
3000 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3002 gcc_assert (CONST_INT_P (shift_amount));
3004 const int shift_amount_i = INTVAL (shift_amount) & 31;
3006 /* Special case for shift count of 31: use and-rotl sequence. */
3007 if (shift_amount_i == 31)
3008 return true;
3010 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3011 & ASHL_CLOBBERS_T) != 0;
3014 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3015 instructions will clobber the T bit. */
3016 bool
3017 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3019 gcc_assert (CONST_INT_P (shift_amount));
3021 /* For right shifts the constant might be negative. */
3022 const int shift_amount_i = std::abs (INTVAL (shift_amount)) & 31;
3024 /* Special case for shift count of 31: use shll-movt sequence. */
3025 if (shift_amount_i == 31)
3026 return true;
3028 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3029 & LSHR_CLOBBERS_T) != 0;
3032 /* Return true if it is potentially beneficial to use a dynamic shift
3033 instruction (shad / shar) instead of a combination of 1/2/8/16
3034 shift instructions for the specified shift count.
3035 If dynamic shifts are not available, always return false. */
3036 bool
3037 sh_dynamicalize_shift_p (rtx count)
3039 gcc_assert (CONST_INT_P (count));
3041 /* For right shifts the constant might be negative. */
3042 const int shift_amount_i = std::abs (INTVAL (count)) & 31;
3043 int insn_count;
3045 /* For left and right shifts, there are shorter 2 insn sequences for
3046 shift amounts of 31. */
3047 if (shift_amount_i == 31)
3048 insn_count = 2;
3049 else
3050 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3052 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3055 /* Assuming we have a value that has been sign-extended by at least one bit,
3056 can we use the ext_shift_amounts with the last shift turned to an
3057 arithmetic shift to shift it by N without data loss, and quicker than by
3058 other means? */
3059 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3061 /* Return the cost of a shift. */
3062 static inline int
3063 shiftcosts (rtx x)
3065 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3067 if (GET_MODE (x) == DImode
3068 && CONST_INT_P (XEXP (x, 1))
3069 && INTVAL (XEXP (x, 1)) == 1)
3070 return 2;
3072 /* Everything else is invalid, because there is no pattern for it. */
3073 return -1;
3075 /* If shift by a non constant, then this will be expensive. */
3076 if (!CONST_INT_P (XEXP (x, 1)))
3077 return SH_DYNAMIC_SHIFT_COST;
3079 /* Otherwise, return the true cost in instructions. Cope with out of range
3080 shift counts more or less arbitrarily. */
3081 int value = INTVAL (XEXP (x, 1)) & 31;
3083 if (GET_CODE (x) == ASHIFTRT)
3085 int cost = ashiftrt_insns[value];
3086 /* If dynamic shifts are available and profitable in this case, then we
3087 put the constant in a reg and use shad. */
3088 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3089 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3090 return cost;
3092 else
3093 return ashl_lshr_seq[value].insn_count;
3096 /* Return the cost of an AND/XOR/IOR operation. */
3097 static inline int
3098 and_xor_ior_costs (rtx x, int code)
3100 /* On SH1-4 we have only max. SImode operations.
3101 Double the cost for modes > SImode. */
3102 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3104 /* A logical operation with two registers is a single cycle
3105 instruction. */
3106 if (!CONST_INT_P (XEXP (x, 1)))
3107 return 1 * cost_scale;
3109 int i = INTVAL (XEXP (x, 1));
3111 /* These constants are single cycle extu.[bw] instructions. */
3112 if ((i == 0xff || i == 0xffff) && code == AND)
3113 return 1 * cost_scale;
3114 /* Constants that can be used in an instruction as an immediate are
3115 a single cycle, but this requires r0, so make it a little more
3116 expensive. */
3117 if (CONST_OK_FOR_K08 (i))
3118 return 2 * cost_scale;
3119 /* Constants that can be loaded with a mov immediate need one more cycle.
3120 This case is probably unnecessary. */
3121 if (CONST_OK_FOR_I08 (i))
3122 return 2 * cost_scale;
3123 /* Any other constant requires an additional 2 cycle pc-relative load.
3124 This case is probably unnecessary. */
3125 return 3 * cost_scale;
3128 /* Return the cost of an addition or a subtraction. */
3129 static inline int
3130 addsubcosts (rtx x)
3132 if (GET_MODE (x) == SImode)
3134 /* The addc or subc patterns will eventually become one or two
3135 instructions. Below are some costs for some of the patterns
3136 which combine would reject because the costs of the individual
3137 insns in the patterns are lower.
3139 FIXME: It would be much easier if we had something like insn cost
3140 attributes and the cost calculation machinery used those attributes
3141 in the first place. This would eliminate redundant recog-like C
3142 code to calculate costs of complex patterns. */
3143 rtx op0 = XEXP (x, 0);
3144 rtx op1 = XEXP (x, 1);
3146 if (GET_CODE (x) == PLUS)
3148 if (GET_CODE (op0) == AND
3149 && XEXP (op0, 1) == const1_rtx
3150 && (GET_CODE (op1) == PLUS
3151 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3152 return 1;
3154 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3155 && GET_CODE (op1) == LSHIFTRT
3156 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3157 return 1;
3159 /* Let's assume that adding the result of an insns that stores into
3160 the T bit is cheap. */
3161 if (treg_set_expr (op1, SImode))
3162 return 1;
3163 if (treg_set_expr (op0, SImode))
3164 return 1;
3167 /* On SH1-4 we have only max. SImode operations.
3168 Double the cost for modes > SImode. */
3169 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3171 /* Adding a register is a single cycle insn. */
3172 if (REG_P (XEXP (x, 1))
3173 || GET_CODE (XEXP (x, 1)) == SUBREG)
3174 return 1 * cost_scale;
3176 /* Likewise for small constants. */
3177 if (CONST_INT_P (XEXP (x, 1))
3178 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3179 return 1 * cost_scale;
3181 /* Any other constant requires a 2 cycle pc-relative load plus an
3182 addition. */
3183 return 3 * cost_scale;
3186 /* Return the cost of a multiply. */
3187 static inline int
3188 multcosts (rtx x ATTRIBUTE_UNUSED)
3190 if (sh_multcost >= 0)
3191 return sh_multcost;
3193 if (TARGET_SH2)
3195 /* We have a mul insn, so we can never take more than the mul and the
3196 read of the mac reg, but count more because of the latency and extra
3197 reg usage. */
3198 if (optimize_size)
3199 return 2;
3200 return 3;
3203 /* If we're aiming at small code, then just count the number of
3204 insns in a multiply call sequence. */
3205 if (optimize_size)
3206 return 5;
3208 /* Otherwise count all the insns in the routine we'd be calling too. */
3209 return 20;
3212 /* Compute a (partial) cost for rtx X. Return true if the complete
3213 cost has been computed, and false if subexpressions should be
3214 scanned. In either case, *TOTAL contains the cost result. */
3215 static bool
3216 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
3217 int opno ATTRIBUTE_UNUSED,
3218 int *total, bool speed ATTRIBUTE_UNUSED)
3220 int code = GET_CODE (x);
3222 switch (code)
3224 /* The lower-subreg pass decides whether to split multi-word regs
3225 into individual regs by looking at the cost for a SET of certain
3226 modes with the following patterns:
3227 (set (reg) (reg))
3228 (set (reg) (const_int 0))
3229 On machines that support vector-move operations a multi-word move
3230 is the same cost as individual reg move. On SH there is no
3231 vector-move, so we have to provide the correct cost in the number
3232 of move insns to load/store the reg of the mode in question. */
3233 case SET:
3234 if (sh_movt_set_dest (x) != NULL || sh_movrt_set_dest (x) != NULL)
3236 *total = COSTS_N_INSNS (1);
3237 return true;
3240 if (register_operand (SET_DEST (x), VOIDmode)
3241 && (register_operand (SET_SRC (x), VOIDmode)
3242 || satisfies_constraint_Z (SET_SRC (x))))
3244 const machine_mode mode = GET_MODE (SET_DEST (x));
3245 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3246 / mov_insn_size (mode, TARGET_SH2A));
3247 return true;
3249 return false;
3251 /* The cost of a mem access is mainly the cost of the address mode. */
3252 case MEM:
3253 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3254 true);
3255 return true;
3257 case IF_THEN_ELSE:
3258 /* This case is required for the if_then_else negc pattern. */
3259 if (treg_set_expr (XEXP (x, 0), SImode))
3261 *total = COSTS_N_INSNS (1);
3262 return true;
3264 else
3265 return false;
3267 /* Zero extracts of single bits are usually combine patterns for the
3268 tst insns. */
3269 case ZERO_EXTRACT:
3270 if (GET_CODE (XEXP (x, 0)) == XOR
3271 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3272 && XEXP (x, 1) == const1_rtx
3273 && CONST_INT_P (XEXP (x, 2))
3274 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3275 /* Check that the xor constaint overlaps with the extracted bit. */
3276 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3278 *total = 1; //COSTS_N_INSNS (1);
3279 return true;
3282 /* div0s variant. */
3283 if (GET_CODE (XEXP (x, 0)) == XOR
3284 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3285 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3287 *total = 1;
3288 return true;
3290 return false;
3292 /* The cost of a sign or zero extend depends on whether the source is a
3293 reg or a mem. In case of a mem take the address into account. */
3294 case SIGN_EXTEND:
3295 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3297 *total = COSTS_N_INSNS (1);
3298 return true;
3300 if (MEM_P (XEXP (x, 0)))
3302 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3303 GET_MODE (XEXP (x, 0)),
3304 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3305 return true;
3307 return false;
3309 case ZERO_EXTEND:
3310 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3312 *total = COSTS_N_INSNS (1);
3313 return true;
3315 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3316 && (GET_MODE (XEXP (x, 0)) == QImode
3317 || GET_MODE (XEXP (x, 0)) == HImode))
3319 /* Handle SH2A's movu.b and movu.w insn. */
3320 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3321 GET_MODE (XEXP (x, 0)),
3322 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3323 return true;
3325 return false;
3327 /* mems for SFmode and DFmode can be inside a parallel due to
3328 the way the fpscr is handled. */
3329 case PARALLEL:
3330 for (int i = 0; i < XVECLEN (x, 0); i++)
3332 rtx xx = XVECEXP (x, 0, i);
3333 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3335 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3336 GET_MODE (XEXP (xx, 0)),
3337 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3338 return true;
3340 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3342 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3343 GET_MODE (XEXP (xx, 1)),
3344 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3345 return true;
3349 if (sh_1el_vec (x, VOIDmode))
3350 *total = outer_code != SET;
3351 else if (sh_rep_vec (x, VOIDmode))
3352 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3353 + (outer_code != SET));
3354 else
3355 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3356 return true;
3358 case CONST_INT:
3359 if (CONST_OK_FOR_I08 (INTVAL (x)))
3360 *total = 0;
3361 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3362 && CONST_OK_FOR_K08 (INTVAL (x)))
3363 *total = 1;
3364 /* prepare_cmp_insn will force costly constants int registers before
3365 the cbranch[sd]i4 patterns can see them, so preserve potentially
3366 interesting ones not covered by I08 above. */
3367 else if (outer_code == COMPARE
3368 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3369 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3370 || INTVAL (x) == 0x7fffffff
3371 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3372 *total = 1;
3373 else
3374 *total = 8;
3375 return true;
3377 case EQ:
3378 /* An and with a constant compared against zero is
3379 most likely going to be a TST #imm, R0 instruction. */
3380 if (XEXP (x, 1) == const0_rtx
3381 && ((GET_CODE (XEXP (x, 0)) == AND
3382 || (SUBREG_P (XEXP (x, 0))
3383 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND))
3384 || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT))
3386 *total = 1;
3387 return true;
3390 else if (XEXP (x, 1) == const0_rtx
3391 && GET_CODE (XEXP (x, 0)) == AND
3392 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3393 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3394 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3395 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3397 *total = 1;
3398 return true;
3400 else
3401 return false;
3403 case SMIN:
3404 case SMAX:
3405 /* This is most likely a clips.b or clips.w insn that is being made up
3406 by combine. */
3407 if (TARGET_SH2A
3408 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3409 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3410 && REG_P (XEXP (XEXP (x, 0), 0))
3411 && CONST_INT_P (XEXP (x, 1)))
3413 *total = COSTS_N_INSNS (1);
3414 return true;
3416 else
3417 return false;
3419 case CONST:
3420 case LABEL_REF:
3421 case SYMBOL_REF:
3422 *total = 5;
3423 return true;
3425 case CONST_DOUBLE:
3426 /* prepare_cmp_insn will force costly constants int registers before
3427 the cbranchdi4 pattern can see them, so preserve potentially
3428 interesting ones. */
3429 if (outer_code == COMPARE && GET_MODE (x) == DImode)
3430 *total = 1;
3431 else
3432 *total = 10;
3433 return true;
3435 case CONST_VECTOR:
3436 /* FIXME: This looks broken. Only the last statement has any effect.
3437 Probably this could be folded with the PARALLEL case? */
3438 if (x == CONST0_RTX (GET_MODE (x)))
3439 *total = 0;
3440 else if (sh_1el_vec (x, VOIDmode))
3441 *total = outer_code != SET;
3442 if (sh_rep_vec (x, VOIDmode))
3443 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3444 + (outer_code != SET));
3445 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3446 return true;
3448 case PLUS:
3449 case MINUS:
3450 *total = COSTS_N_INSNS (addsubcosts (x));
3451 return true;
3453 case AND:
3454 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3455 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3457 *total = COSTS_N_INSNS (1);
3458 return true;
3460 /* Fall through. */
3462 case XOR:
3463 case IOR:
3464 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3465 return true;
3467 case MULT:
3468 *total = COSTS_N_INSNS (multcosts (x));
3469 return true;
3471 case LT:
3472 case GE:
3473 /* div0s sign comparison. */
3474 if (GET_CODE (XEXP (x, 0)) == XOR
3475 && REG_P ((XEXP (XEXP (x, 0), 0)))
3476 && REG_P ((XEXP (XEXP (x, 0), 1)))
3477 && satisfies_constraint_Z (XEXP (x, 1)))
3479 *total = COSTS_N_INSNS (1);
3480 return true;
3482 else
3483 return false;
3485 case LSHIFTRT:
3486 /* div0s sign comparison. */
3487 if (GET_CODE (XEXP (x, 0)) == XOR
3488 && REG_P ((XEXP (XEXP (x, 0), 0)))
3489 && REG_P ((XEXP (XEXP (x, 0), 1)))
3490 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3492 *total = COSTS_N_INSNS (1);
3493 return true;
3495 /* FALLTHRU */
3496 case ASHIFT:
3497 case ASHIFTRT:
3499 int cost = shiftcosts (x);
3500 if (cost < 0)
3501 return false;
3502 *total = COSTS_N_INSNS (cost);
3503 return true;
3506 case DIV:
3507 case UDIV:
3508 case MOD:
3509 case UMOD:
3510 *total = COSTS_N_INSNS (20);
3511 return true;
3513 case FLOAT:
3514 case FIX:
3515 *total = 100;
3516 return true;
3518 default:
3519 return false;
3523 /* Determine the size of the fundamental move insn that will be used
3524 for the specified mode. */
3525 static inline int
3526 mov_insn_size (machine_mode mode, bool consider_sh2a)
3528 const int mode_sz = GET_MODE_SIZE (mode);
3530 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3531 || (TARGET_FMOVD && mode == DFmode))
3532 return mode_sz;
3533 else
3535 /* The max. available mode for actual move insns is SImode.
3536 Larger accesses will be split into multiple loads/stores. */
3537 const int max_mov_sz = GET_MODE_SIZE (SImode);
3538 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3542 /* Determine the maximum possible displacement for a move insn for the
3543 specified mode. */
3545 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3547 /* The 4 byte displacement move insns are the same as the 2 byte
3548 versions but take a 12 bit displacement. All we need to do is to
3549 scale the max. displacement value accordingly. */
3550 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3552 /* SH2A supports FPU move insns with 12 bit displacements.
3553 Other variants to do not support any kind of displacements for
3554 FPU move insns. */
3555 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3556 return 0;
3557 else
3559 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3560 const int mode_sz = GET_MODE_SIZE (mode);
3561 int r = 15 * mov_insn_sz * disp_scale;
3563 /* If the mov insn will be split into multiple loads/stores, the
3564 maximum possible displacement is a bit smaller. */
3565 if (mode_sz > mov_insn_sz)
3566 r -= mode_sz - mov_insn_sz;
3567 return r;
3571 /* Determine the alignment mask for a move insn of the
3572 specified mode. */
3573 static inline int
3574 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3576 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3577 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3580 /* Return the displacement value of a displacement address. */
3581 HOST_WIDE_INT
3582 sh_disp_addr_displacement (rtx x)
3584 gcc_assert (satisfies_constraint_Sdd (x));
3585 return INTVAL (XEXP (XEXP (x, 0), 1));
3588 /* Compute the cost of an address. */
3589 static int
3590 sh_address_cost (rtx x, machine_mode mode,
3591 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3593 /* 'GBR + 0'. Account one more because of R0 restriction. */
3594 if (REG_P (x) && REGNO (x) == GBR_REG)
3595 return 2;
3597 /* Simple reg, post-inc, pre-dec addressing. */
3598 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3599 return 1;
3601 /* 'reg + disp' addressing. */
3602 if (GET_CODE (x) == PLUS
3603 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3605 /* 'GBR + disp'. Account one more because of R0 restriction. */
3606 if (REGNO (XEXP (x, 0)) == GBR_REG
3607 && gbr_displacement (XEXP (x, 1), mode))
3608 return 2;
3610 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3612 if (offset == 0)
3613 return 1;
3615 /* The displacement would fit into a 2 byte move insn.
3616 HImode and QImode loads/stores with displacement put pressure on
3617 R0 which will most likely require another reg copy. Thus account
3618 a higher cost for that. */
3619 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3620 return (mode == HImode || mode == QImode) ? 2 : 1;
3622 /* The displacement would fit into a 4 byte move insn (SH2A). */
3623 if (TARGET_SH2A
3624 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3625 return 2;
3627 /* The displacement is probably out of range and will require extra
3628 calculations. */
3629 return 3;
3632 /* 'reg + reg' addressing. Account a slightly higher cost because of
3633 increased pressure on R0. */
3634 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1)))
3635 return 3;
3637 /* Not sure what it is - probably expensive. */
3638 return 10;
3641 /* Code to expand a shift. */
3642 static void
3643 gen_ashift (int type, int n, rtx reg)
3645 rtx n_rtx;
3647 /* Negative values here come from the shift_amounts array. */
3648 if (n < 0)
3650 if (type == ASHIFT)
3651 type = LSHIFTRT;
3652 else
3653 type = ASHIFT;
3654 n = -n;
3657 n_rtx = GEN_INT (n);
3658 gcc_assert (satisfies_constraint_P27 (n_rtx));
3660 switch (type)
3662 case ASHIFTRT:
3663 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3664 break;
3665 case LSHIFTRT:
3666 if (n == 1)
3667 emit_insn (gen_shlr (reg, reg));
3668 else
3669 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3670 break;
3671 case ASHIFT:
3672 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3673 break;
3674 default:
3675 gcc_unreachable ();
3679 /* Code to expand a HImode shift. */
3680 static void
3681 gen_ashift_hi (int type, int n, rtx reg)
3683 /* Negative values here come from the shift_amounts array. */
3684 if (n < 0)
3686 if (type == ASHIFT)
3687 type = LSHIFTRT;
3688 else
3689 type = ASHIFT;
3690 n = -n;
3693 switch (type)
3695 case ASHIFTRT:
3696 case LSHIFTRT:
3697 /* We don't have HImode right shift operations because using the
3698 ordinary 32 bit shift instructions for that doesn't generate proper
3699 zero/sign extension.
3700 gen_ashift_hi is only called in contexts where we know that the
3701 sign extension works out correctly. */
3703 int offset = 0;
3704 if (GET_CODE (reg) == SUBREG)
3706 offset = SUBREG_BYTE (reg);
3707 reg = SUBREG_REG (reg);
3709 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3710 break;
3712 case ASHIFT:
3713 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3714 break;
3718 /* Output RTL to split a constant shift into its component SH constant
3719 shift instructions. */
3720 void
3721 gen_shifty_op (int code, rtx *operands)
3723 int value = INTVAL (operands[2]);
3724 int max, i;
3726 /* Truncate the shift count in case it is out of bounds. */
3727 value = value & 31;
3729 if (value == 31)
3731 if (code == LSHIFTRT)
3733 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3734 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3735 return;
3737 else if (code == ASHIFT)
3739 /* There is a two instruction sequence for 31 bit left shifts,
3740 but it requires r0. */
3741 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3743 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3744 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3745 return;
3749 else if (value == 0)
3751 /* This can happen even when optimizing, if there were subregs before
3752 reload. Don't output a nop here, as this is never optimized away;
3753 use a no-op move instead. */
3754 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3755 return;
3758 max = ashl_lshr_seq[value].insn_count;
3759 for (i = 0; i < max; i++)
3760 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3763 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3764 don't matter. */
3765 void
3766 gen_shifty_hi_op (int code, rtx *operands)
3768 int value = INTVAL (operands[2]);
3769 int max, i;
3770 void (*gen_fun) (int, int, rtx);
3772 /* This operation is used by and_shl for SImode values with a few
3773 high bits known to be cleared. */
3774 value &= 31;
3775 if (value == 0)
3777 emit_insn (gen_nop ());
3778 return;
3781 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3782 if (code == ASHIFT)
3784 max = ext_ashl_lshr_seq[value].insn_count;
3785 for (i = 0; i < max; i++)
3786 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3788 else
3789 /* When shifting right, emit the shifts in reverse order, so that
3790 solitary negative values come first. */
3791 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3792 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3795 /* Output RTL for an arithmetic right shift.
3796 ??? Rewrite to use super-optimizer sequences. */
3797 bool
3798 expand_ashiftrt (rtx *operands)
3800 rtx wrk;
3801 char func[18];
3802 int value;
3804 if (TARGET_DYNSHIFT)
3806 if (!CONST_INT_P (operands[2]))
3808 rtx count = copy_to_mode_reg (SImode, operands[2]);
3809 emit_insn (gen_negsi2 (count, count));
3810 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3811 return true;
3813 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3814 > 1 + SH_DYNAMIC_SHIFT_COST)
3816 rtx count
3817 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3818 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3819 return true;
3822 if (!CONST_INT_P (operands[2]))
3823 return false;
3825 value = INTVAL (operands[2]) & 31;
3827 if (value == 31)
3829 /* If we are called from abs expansion, arrange things so that we
3830 we can use a single MT instruction that doesn't clobber the source,
3831 if LICM can hoist out the load of the constant zero. */
3832 if (currently_expanding_to_rtl)
3834 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3835 operands[1]));
3836 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3837 return true;
3839 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3840 return true;
3842 else if (value >= 16 && value <= 19)
3844 wrk = gen_reg_rtx (SImode);
3845 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3846 value -= 16;
3847 while (value--)
3848 gen_ashift (ASHIFTRT, 1, wrk);
3849 emit_move_insn (operands[0], wrk);
3850 return true;
3852 /* Expand a short sequence inline, longer call a magic routine. */
3853 else if (value <= 5)
3855 wrk = gen_reg_rtx (SImode);
3856 emit_move_insn (wrk, operands[1]);
3857 while (value--)
3858 gen_ashift (ASHIFTRT, 1, wrk);
3859 emit_move_insn (operands[0], wrk);
3860 return true;
3863 wrk = gen_reg_rtx (Pmode);
3865 /* Load the value into an arg reg and call a helper. */
3866 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3867 sprintf (func, "__ashiftrt_r4_%d", value);
3868 rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
3869 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
3870 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3871 return true;
3874 /* Try to find a good way to implement the combiner pattern
3875 [(set (match_operand:SI 0 "register_operand" "r")
3876 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3877 (match_operand:SI 2 "const_int_operand" "n"))
3878 (match_operand:SI 3 "const_int_operand" "n"))) .
3879 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3880 return 0 for simple right / left or left/right shift combination.
3881 return 1 for a combination of shifts with zero_extend.
3882 return 2 for a combination of shifts with an AND that needs r0.
3883 return 3 for a combination of shifts with an AND that needs an extra
3884 scratch register, when the three highmost bits of the AND mask are clear.
3885 return 4 for a combination of shifts with an AND that needs an extra
3886 scratch register, when any of the three highmost bits of the AND mask
3887 is set.
3888 If ATTRP is set, store an initial right shift width in ATTRP[0],
3889 and the instruction length in ATTRP[1] . These values are not valid
3890 when returning 0.
3891 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3892 shift_amounts for the last shift value that is to be used before the
3893 sign extend. */
3895 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3897 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3898 int left = INTVAL (left_rtx), right;
3899 int best = 0;
3900 int cost, best_cost = 10000;
3901 int best_right = 0, best_len = 0;
3902 int i;
3903 int can_ext;
3905 if (left < 0 || left > 31)
3906 return 0;
3907 if (CONST_INT_P (mask_rtx))
3908 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3909 else
3910 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3911 /* Can this be expressed as a right shift / left shift pair? */
3912 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3913 right = exact_log2 (lsb);
3914 mask2 = ~(mask + lsb - 1);
3915 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3916 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3917 if (! mask2)
3918 best_cost = ashl_lshr_seq[right].insn_count
3919 + ashl_lshr_seq[right + left].insn_count;
3920 /* mask has no trailing zeroes <==> ! right */
3921 else if (! right && mask2 == ~(lsb2 - 1))
3923 int late_right = exact_log2 (lsb2);
3924 best_cost = ashl_lshr_seq[left + late_right].insn_count
3925 + ashl_lshr_seq[late_right].insn_count;
3927 /* Try to use zero extend. */
3928 if (mask2 == ~(lsb2 - 1))
3930 int width, first;
3932 for (width = 8; width <= 16; width += 8)
3934 /* Can we zero-extend right away? */
3935 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3937 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3938 + ext_ashl_lshr_seq[left + right].insn_count;
3939 if (cost < best_cost)
3941 best = 1;
3942 best_cost = cost;
3943 best_right = right;
3944 best_len = cost;
3945 if (attrp)
3946 attrp[2] = -1;
3948 continue;
3950 /* ??? Could try to put zero extend into initial right shift,
3951 or even shift a bit left before the right shift. */
3952 /* Determine value of first part of left shift, to get to the
3953 zero extend cut-off point. */
3954 first = width - exact_log2 (lsb2) + right;
3955 if (first >= 0 && right + left - first >= 0)
3957 cost = ext_ashl_lshr_seq[right].insn_count
3958 + ext_ashl_lshr_seq[first].insn_count + 1
3959 + ext_ashl_lshr_seq[right + left - first].insn_count;
3961 if (cost < best_cost)
3963 best = 1;
3964 best_cost = cost;
3965 best_right = right;
3966 best_len = cost;
3967 if (attrp)
3968 attrp[2] = first;
3973 /* Try to use r0 AND pattern */
3974 for (i = 0; i <= 2; i++)
3976 if (i > right)
3977 break;
3978 if (! CONST_OK_FOR_K08 (mask >> i))
3979 continue;
3980 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
3981 if (cost < best_cost)
3983 best = 2;
3984 best_cost = cost;
3985 best_right = i;
3986 best_len = cost - 1;
3989 /* Try to use a scratch register to hold the AND operand. */
3990 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3991 for (i = 0; i <= 2; i++)
3993 if (i > right)
3994 break;
3995 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3996 + (can_ext
3997 ? ext_ashl_lshr_seq
3998 : ashl_lshr_seq)[left + i].insn_count;
3999 if (cost < best_cost)
4001 best = 4 - can_ext;
4002 best_cost = cost;
4003 best_right = i;
4004 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4008 if (attrp)
4010 attrp[0] = best_right;
4011 attrp[1] = best_len;
4013 return best;
4016 /* This is used in length attributes of the unnamed instructions
4017 corresponding to shl_and_kind return values of 1 and 2. */
4019 shl_and_length (rtx insn)
4021 rtx set_src, left_rtx, mask_rtx;
4022 int attributes[3];
4024 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4025 left_rtx = XEXP (XEXP (set_src, 0), 1);
4026 mask_rtx = XEXP (set_src, 1);
4027 shl_and_kind (left_rtx, mask_rtx, attributes);
4028 return attributes[1];
4031 /* This is used in length attribute of the and_shl_scratch instruction. */
4033 shl_and_scr_length (rtx insn)
4035 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4036 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4037 rtx op = XEXP (set_src, 0);
4038 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4039 op = XEXP (XEXP (op, 0), 0);
4040 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4043 /* Generate rtl for instructions for which shl_and_kind advised a particular
4044 method of generating them, i.e. returned zero. */
4045 bool
4046 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4048 int attributes[3];
4049 unsigned HOST_WIDE_INT mask;
4050 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4051 int right, total_shift;
4052 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4054 right = attributes[0];
4055 total_shift = INTVAL (left_rtx) + right;
4056 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4057 switch (kind)
4059 default:
4060 return true;
4061 case 1:
4063 int first = attributes[2];
4064 rtx operands[3];
4066 if (first < 0)
4068 emit_insn ((mask << right) <= 0xff
4069 ? gen_zero_extendqisi2 (dest,
4070 gen_lowpart (QImode, source))
4071 : gen_zero_extendhisi2 (dest,
4072 gen_lowpart (HImode, source)));
4073 source = dest;
4075 if (source != dest)
4076 emit_insn (gen_movsi (dest, source));
4077 operands[0] = dest;
4078 if (right)
4080 operands[2] = GEN_INT (right);
4081 gen_shifty_hi_op (LSHIFTRT, operands);
4083 if (first > 0)
4085 operands[2] = GEN_INT (first);
4086 gen_shifty_hi_op (ASHIFT, operands);
4087 total_shift -= first;
4088 mask <<= first;
4090 if (first >= 0)
4091 emit_insn (mask <= 0xff
4092 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4093 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4094 if (total_shift > 0)
4096 operands[2] = GEN_INT (total_shift);
4097 gen_shifty_hi_op (ASHIFT, operands);
4099 break;
4101 case 4:
4102 shift_gen_fun = gen_shifty_op;
4103 /* FALLTHRU */
4104 case 3:
4105 /* If the topmost bit that matters is set, set the topmost bits
4106 that don't matter. This way, we might be able to get a shorter
4107 signed constant. */
4108 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4109 mask |= (HOST_WIDE_INT) ((HOST_WIDE_INT_M1U) << (31 - total_shift));
4110 /* FALLTHRU */
4111 case 2:
4112 /* Don't expand fine-grained when combining, because that will
4113 make the pattern fail. */
4114 if (currently_expanding_to_rtl
4115 || reload_in_progress || reload_completed)
4117 rtx operands[3];
4119 /* Cases 3 and 4 should be handled by this split
4120 only while combining */
4121 gcc_assert (kind <= 2);
4122 if (right)
4124 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4125 source = dest;
4127 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4128 if (total_shift)
4130 operands[0] = dest;
4131 operands[1] = dest;
4132 operands[2] = GEN_INT (total_shift);
4133 shift_gen_fun (ASHIFT, operands);
4135 break;
4137 else
4139 int neg = 0;
4140 if (kind != 4 && total_shift < 16)
4142 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4143 if (neg > 0)
4144 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4145 else
4146 neg = 0;
4148 emit_insn (gen_and_shl_scratch (dest, source,
4149 GEN_INT (right),
4150 GEN_INT (mask),
4151 GEN_INT (total_shift + neg),
4152 GEN_INT (neg)));
4153 emit_insn (gen_movsi (dest, dest));
4154 break;
4157 return false;
4160 /* Try to find a good way to implement the combiner pattern
4161 [(set (match_operand:SI 0 "register_operand" "=r")
4162 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4163 (match_operand:SI 2 "const_int_operand" "n")
4164 (match_operand:SI 3 "const_int_operand" "n")
4165 (const_int 0)))
4166 (clobber (reg:SI T_REG))]
4167 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4168 return 0 for simple left / right shift combination.
4169 return 1 for left shift / 8 bit sign extend / left shift.
4170 return 2 for left shift / 16 bit sign extend / left shift.
4171 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4172 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4173 return 5 for left shift / 16 bit sign extend / right shift
4174 return 6 for < 8 bit sign extend / left shift.
4175 return 7 for < 8 bit sign extend / left shift / single right shift.
4176 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4178 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4180 int left, size, insize, ext;
4181 int cost = 0, best_cost;
4182 int kind;
4184 left = INTVAL (left_rtx);
4185 size = INTVAL (size_rtx);
4186 insize = size - left;
4187 gcc_assert (insize > 0);
4188 /* Default to left / right shift. */
4189 kind = 0;
4190 best_cost = ashl_lshr_seq[32 - insize].insn_count
4191 + ashl_lshr_seq[32 - size].insn_count;
4192 if (size <= 16)
4194 /* 16 bit shift / sign extend / 16 bit shift */
4195 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4196 + ashl_lshr_seq[16 - size].insn_count;
4197 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4198 below, by alternative 3 or something even better. */
4199 if (cost < best_cost)
4201 kind = 5;
4202 best_cost = cost;
4205 /* Try a plain sign extend between two shifts. */
4206 for (ext = 16; ext >= insize; ext -= 8)
4208 if (ext <= size)
4210 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4211 + ashl_lshr_seq[size - ext].insn_count;
4212 if (cost < best_cost)
4214 kind = ext / (unsigned) 8;
4215 best_cost = cost;
4218 /* Check if we can do a sloppy shift with a final signed shift
4219 restoring the sign. */
4220 if (EXT_SHIFT_SIGNED (size - ext))
4221 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4222 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4223 /* If not, maybe it's still cheaper to do the second shift sloppy,
4224 and do a final sign extend? */
4225 else if (size <= 16)
4226 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4227 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4228 + 1;
4229 else
4230 continue;
4231 if (cost < best_cost)
4233 kind = ext / (unsigned) 8 + 2;
4234 best_cost = cost;
4237 /* Check if we can sign extend in r0 */
4238 if (insize < 8)
4240 cost = 3 + ashl_lshr_seq[left].insn_count;
4241 if (cost < best_cost)
4243 kind = 6;
4244 best_cost = cost;
4246 /* Try the same with a final signed shift. */
4247 if (left < 31)
4249 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4250 if (cost < best_cost)
4252 kind = 7;
4253 best_cost = cost;
4257 if (TARGET_DYNSHIFT)
4259 /* Try to use a dynamic shift. */
4260 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4261 if (cost < best_cost)
4263 kind = 0;
4264 best_cost = cost;
4267 if (costp)
4268 *costp = cost;
4269 return kind;
4272 /* Function to be used in the length attribute of the instructions
4273 implementing this pattern. */
4275 shl_sext_length (rtx insn)
4277 rtx set_src, left_rtx, size_rtx;
4278 int cost;
4280 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4281 left_rtx = XEXP (XEXP (set_src, 0), 1);
4282 size_rtx = XEXP (set_src, 1);
4283 shl_sext_kind (left_rtx, size_rtx, &cost);
4284 return cost;
4287 /* Generate rtl for this pattern */
4288 bool
4289 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4291 int kind;
4292 int left, size, insize, cost;
4293 rtx operands[3];
4295 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4296 left = INTVAL (left_rtx);
4297 size = INTVAL (size_rtx);
4298 insize = size - left;
4299 switch (kind)
4301 case 1:
4302 case 2:
4303 case 3:
4304 case 4:
4306 int ext = kind & 1 ? 8 : 16;
4307 int shift2 = size - ext;
4309 /* Don't expand fine-grained when combining, because that will
4310 make the pattern fail. */
4311 if (! currently_expanding_to_rtl
4312 && ! reload_in_progress && ! reload_completed)
4314 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4315 emit_insn (gen_movsi (dest, source));
4316 break;
4318 if (dest != source)
4319 emit_insn (gen_movsi (dest, source));
4320 operands[0] = dest;
4321 if (ext - insize)
4323 operands[2] = GEN_INT (ext - insize);
4324 gen_shifty_hi_op (ASHIFT, operands);
4326 emit_insn (kind & 1
4327 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4328 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4329 if (kind <= 2)
4331 if (shift2)
4333 operands[2] = GEN_INT (shift2);
4334 gen_shifty_op (ASHIFT, operands);
4337 else
4339 if (shift2 > 0)
4341 if (EXT_SHIFT_SIGNED (shift2))
4343 operands[2] = GEN_INT (shift2 + 1);
4344 gen_shifty_op (ASHIFT, operands);
4345 operands[2] = const1_rtx;
4346 gen_shifty_op (ASHIFTRT, operands);
4347 break;
4349 operands[2] = GEN_INT (shift2);
4350 gen_shifty_hi_op (ASHIFT, operands);
4352 else if (shift2)
4354 operands[2] = GEN_INT (-shift2);
4355 gen_shifty_hi_op (LSHIFTRT, operands);
4357 emit_insn (size <= 8
4358 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4359 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4361 break;
4363 case 5:
4365 int i = 16 - size;
4366 if (! currently_expanding_to_rtl
4367 && ! reload_in_progress && ! reload_completed)
4368 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4369 else
4371 operands[0] = dest;
4372 operands[2] = GEN_INT (16 - insize);
4373 gen_shifty_hi_op (ASHIFT, operands);
4374 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4376 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4377 while (--i >= 0)
4378 gen_ashift (ASHIFTRT, 1, dest);
4379 break;
4381 case 6:
4382 case 7:
4383 /* Don't expand fine-grained when combining, because that will
4384 make the pattern fail. */
4385 if (! currently_expanding_to_rtl
4386 && ! reload_in_progress && ! reload_completed)
4388 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4389 emit_insn (gen_movsi (dest, source));
4390 break;
4392 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4393 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4394 emit_insn (gen_addsi3 (dest, dest, GEN_INT (HOST_WIDE_INT_M1U << (insize - 1))));
4395 operands[0] = dest;
4396 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4397 gen_shifty_op (ASHIFT, operands);
4398 if (kind == 7)
4399 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4400 break;
4401 default:
4402 return true;
4404 return false;
4407 typedef struct label_ref_list_d
4409 rtx_code_label *label;
4410 struct label_ref_list_d *next;
4411 } *label_ref_list_t;
4413 static object_allocator<label_ref_list_d> label_ref_list_d_pool
4414 ("label references list");
4416 /* The SH cannot load a large constant into a register, constants have to
4417 come from a pc relative load. The reference of a pc relative load
4418 instruction must be less than 1k in front of the instruction. This
4419 means that we often have to dump a constant inside a function, and
4420 generate code to branch around it.
4422 It is important to minimize this, since the branches will slow things
4423 down and make things bigger.
4425 Worst case code looks like:
4427 mov.l L1,rn
4428 bra L2
4430 align
4431 L1: .long value
4435 mov.l L3,rn
4436 bra L4
4438 align
4439 L3: .long value
4443 We fix this by performing a scan before scheduling, which notices which
4444 instructions need to have their operands fetched from the constant table
4445 and builds the table.
4447 The algorithm is:
4449 scan, find an instruction which needs a pcrel move. Look forward, find the
4450 last barrier which is within MAX_COUNT bytes of the requirement.
4451 If there isn't one, make one. Process all the instructions between
4452 the find and the barrier.
4454 In the above example, we can tell that L3 is within 1k of L1, so
4455 the first move can be shrunk from the 3 insn+constant sequence into
4456 just 1 insn, and the constant moved to L3 to make:
4458 mov.l L1,rn
4460 mov.l L3,rn
4461 bra L4
4463 align
4464 L3:.long value
4465 L4:.long value
4467 Then the second move becomes the target for the shortening process. */
4469 typedef struct
4471 rtx value; /* Value in table. */
4472 rtx_code_label *label; /* Label of value. */
4473 label_ref_list_t wend; /* End of window. */
4474 machine_mode mode; /* Mode of value. */
4476 /* True if this constant is accessed as part of a post-increment
4477 sequence. Note that HImode constants are never accessed in this way. */
4478 bool part_of_sequence_p;
4479 } pool_node;
4481 /* The maximum number of constants that can fit into one pool, since
4482 constants in the range 0..510 are at least 2 bytes long, and in the
4483 range from there to 1018 at least 4 bytes. */
4485 #define MAX_POOL_SIZE 372
4486 static pool_node pool_vector[MAX_POOL_SIZE];
4487 static int pool_size;
4488 static rtx_code_label *pool_window_label;
4489 static int pool_window_last;
4491 static int max_labelno_before_reorg;
4493 /* ??? If we need a constant in HImode which is the truncated value of a
4494 constant we need in SImode, we could combine the two entries thus saving
4495 two bytes. Is this common enough to be worth the effort of implementing
4496 it? */
4498 /* ??? This stuff should be done at the same time that we shorten branches.
4499 As it is now, we must assume that all branches are the maximum size, and
4500 this causes us to almost always output constant pools sooner than
4501 necessary. */
4503 /* Add a constant to the pool and return its label. */
4504 static rtx_code_label *
4505 add_constant (rtx x, machine_mode mode, rtx last_value)
4507 rtx_code_label *lab, *new_rtx;
4508 label_ref_list_t ref, newref;
4510 /* First see if we've already got it. */
4511 for (int i = 0; i < pool_size; i++)
4513 if (x->code == pool_vector[i].value->code
4514 && mode == pool_vector[i].mode)
4516 if (x->code == CODE_LABEL)
4518 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4519 continue;
4521 if (rtx_equal_p (x, pool_vector[i].value))
4523 lab = new_rtx = 0;
4524 if (! last_value
4525 || ! i
4526 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4528 new_rtx = gen_label_rtx ();
4529 LABEL_REFS (new_rtx) = pool_vector[i].label;
4530 pool_vector[i].label = lab = new_rtx;
4532 if (lab && pool_window_label)
4534 newref = label_ref_list_d_pool.allocate ();
4535 newref->label = pool_window_label;
4536 ref = pool_vector[pool_window_last].wend;
4537 newref->next = ref;
4538 pool_vector[pool_window_last].wend = newref;
4540 if (new_rtx)
4541 pool_window_label = new_rtx;
4542 pool_window_last = i;
4543 return lab;
4548 /* Need a new one. */
4549 pool_vector[pool_size].value = x;
4550 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4552 lab = 0;
4553 pool_vector[pool_size - 1].part_of_sequence_p = true;
4555 else
4556 lab = gen_label_rtx ();
4557 pool_vector[pool_size].mode = mode;
4558 pool_vector[pool_size].label = lab;
4559 pool_vector[pool_size].wend = NULL;
4560 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4561 if (lab && pool_window_label)
4563 newref = label_ref_list_d_pool.allocate ();
4564 newref->label = pool_window_label;
4565 ref = pool_vector[pool_window_last].wend;
4566 newref->next = ref;
4567 pool_vector[pool_window_last].wend = newref;
4569 if (lab)
4570 pool_window_label = lab;
4571 pool_window_last = pool_size;
4572 pool_size++;
4573 return lab;
4576 /* Output the literal table. START, if nonzero, is the first instruction
4577 this table is needed for, and also indicates that there is at least one
4578 casesi_worker_2 instruction; We have to emit the operand3 labels from
4579 these insns at a 4-byte aligned position. BARRIER is the barrier
4580 after which we are to place the table. */
4581 static void
4582 dump_table (rtx_insn *start, rtx_insn *barrier)
4584 rtx_insn *scan = barrier;
4585 bool need_align = true;
4586 rtx lab;
4587 label_ref_list_t ref;
4588 bool have_df = false;
4590 /* Do two passes, first time dump out the HI sized constants. */
4592 for (int i = 0; i < pool_size; i++)
4594 pool_node *p = &pool_vector[i];
4596 if (p->mode == HImode)
4598 if (need_align)
4600 scan = emit_insn_after (gen_align_2 (), scan);
4601 need_align = false;
4603 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4604 scan = emit_label_after (lab, scan);
4605 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4606 scan);
4607 for (ref = p->wend; ref; ref = ref->next)
4609 lab = ref->label;
4610 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4613 else if (p->mode == DFmode)
4614 have_df = true;
4617 need_align = true;
4619 if (start)
4621 scan = emit_insn_after (gen_align_4 (), scan);
4622 need_align = false;
4623 for (; start != barrier; start = NEXT_INSN (start))
4624 if (NONJUMP_INSN_P (start)
4625 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4627 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4628 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4630 scan = emit_label_after (lab, scan);
4633 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4635 rtx_insn *align_insn = NULL;
4637 scan = emit_label_after (gen_label_rtx (), scan);
4638 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4639 need_align = false;
4641 for (int i = 0; i < pool_size; i++)
4643 pool_node *p = &pool_vector[i];
4645 switch (p->mode)
4647 case E_HImode:
4648 break;
4649 case E_SImode:
4650 case E_SFmode:
4651 if (align_insn && !p->part_of_sequence_p)
4653 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4654 emit_label_before (lab, align_insn);
4655 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4656 align_insn);
4657 for (ref = p->wend; ref; ref = ref->next)
4659 lab = ref->label;
4660 emit_insn_before (gen_consttable_window_end (lab),
4661 align_insn);
4663 delete_insn (align_insn);
4664 align_insn = NULL;
4665 continue;
4667 else
4669 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4670 scan = emit_label_after (lab, scan);
4671 scan = emit_insn_after (gen_consttable_4 (p->value,
4672 const0_rtx), scan);
4673 need_align = ! need_align;
4675 break;
4676 case E_DFmode:
4677 if (need_align)
4679 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4680 align_insn = scan;
4681 need_align = false;
4683 /* FALLTHRU */
4684 case E_DImode:
4685 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4686 scan = emit_label_after (lab, scan);
4687 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4688 scan);
4689 break;
4690 default:
4691 gcc_unreachable ();
4694 if (p->mode != HImode)
4696 for (ref = p->wend; ref; ref = ref->next)
4698 lab = ref->label;
4699 scan = emit_insn_after (gen_consttable_window_end (lab),
4700 scan);
4705 pool_size = 0;
4708 for (int i = 0; i < pool_size; i++)
4710 pool_node *p = &pool_vector[i];
4712 switch (p->mode)
4714 case E_HImode:
4715 break;
4716 case E_SImode:
4717 case E_SFmode:
4718 if (need_align)
4720 need_align = false;
4721 scan = emit_label_after (gen_label_rtx (), scan);
4722 scan = emit_insn_after (gen_align_4 (), scan);
4724 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4725 scan = emit_label_after (lab, scan);
4726 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4727 scan);
4728 break;
4729 case E_DFmode:
4730 case E_DImode:
4731 if (need_align)
4733 need_align = false;
4734 scan = emit_label_after (gen_label_rtx (), scan);
4735 scan = emit_insn_after (gen_align_4 (), scan);
4737 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4738 scan = emit_label_after (lab, scan);
4739 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4740 scan);
4741 break;
4742 default:
4743 gcc_unreachable ();
4746 if (p->mode != HImode)
4748 for (ref = p->wend; ref; ref = ref->next)
4750 lab = ref->label;
4751 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4756 scan = emit_insn_after (gen_consttable_end (), scan);
4757 scan = emit_barrier_after (scan);
4758 pool_size = 0;
4759 pool_window_label = NULL;
4760 pool_window_last = 0;
4763 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4765 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4767 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4768 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4769 need to fix it if the input value is CONST_OK_FOR_I08. */
4770 static bool
4771 broken_move (rtx_insn *insn)
4773 if (NONJUMP_INSN_P (insn))
4775 rtx pat = PATTERN (insn);
4776 if (GET_CODE (pat) == PARALLEL)
4777 pat = XVECEXP (pat, 0, 0);
4778 if (GET_CODE (pat) == SET
4779 /* We can load any 8-bit value if we don't care what the high
4780 order bits end up as. */
4781 && GET_MODE (SET_DEST (pat)) != QImode
4782 && (CONSTANT_P (SET_SRC (pat))
4783 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4784 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4785 /* Match mova_const. */
4786 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4787 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4788 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4789 && ! (TARGET_SH2E
4790 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4791 && (fp_zero_operand (SET_SRC (pat))
4792 || fp_one_operand (SET_SRC (pat)))
4793 /* In general we don't know the current setting of fpscr, so
4794 disable fldi.
4795 There is an exception if this was a register-register move
4796 before reload - and hence it was ascertained that we have
4797 single precision setting - and in a post-reload optimization
4798 we changed this to do a constant load. In that case
4799 we don't have an r0 clobber, hence we must use fldi. */
4800 && (TARGET_FMOVD
4801 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4802 == SCRATCH))
4803 && REG_P (SET_DEST (pat))
4804 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4805 && ! (TARGET_SH2A
4806 && GET_MODE (SET_DEST (pat)) == SImode
4807 && (satisfies_constraint_I20 (SET_SRC (pat))
4808 || satisfies_constraint_I28 (SET_SRC (pat))))
4809 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4810 return true;
4813 return false;
4816 /* Return true if the specified insn is a mova insn. */
4817 static bool
4818 mova_p (rtx_insn *insn)
4820 return (NONJUMP_INSN_P (insn)
4821 && GET_CODE (PATTERN (insn)) == SET
4822 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4823 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4824 /* Don't match mova_const. */
4825 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4828 /* Fix up a mova from a switch that went out of range. */
4829 static void
4830 fixup_mova (rtx_insn *mova)
4832 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4833 if (! flag_pic)
4835 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4836 INSN_CODE (mova) = -1;
4838 else
4840 rtx_insn *worker = mova;
4841 rtx_code_label *lab = gen_label_rtx ();
4842 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4846 worker = NEXT_INSN (worker);
4847 gcc_assert (worker
4848 && !LABEL_P (worker)
4849 && !JUMP_P (worker));
4850 } while (NOTE_P (worker)
4851 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4852 wpat = PATTERN (worker);
4853 wpat0 = XVECEXP (wpat, 0, 0);
4854 wpat1 = XVECEXP (wpat, 0, 1);
4855 wsrc = SET_SRC (wpat0);
4856 PATTERN (worker) = (gen_casesi_worker_2
4857 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4858 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4859 XEXP (wpat1, 0)));
4860 INSN_CODE (worker) = -1;
4861 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4862 base = gen_rtx_LABEL_REF (Pmode, lab);
4863 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4864 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4865 INSN_CODE (mova) = -1;
4869 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4870 *num_mova, and check if the new mova is not nested within the first one.
4871 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4872 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4873 static int
4874 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
4876 int n_addr = 0; /* Initialization to shut up spurious warning. */
4877 int f_target, n_target = 0; /* Likewise. */
4879 if (optimize)
4881 /* If NEW_MOVA has no address yet, it will be handled later. */
4882 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4883 return -1;
4885 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4886 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4887 if (n_addr > n_target || n_addr + 1022 < n_target)
4889 /* Change the mova into a load.
4890 broken_move will then return true for it. */
4891 fixup_mova (new_mova);
4892 return 1;
4895 if (!(*num_mova)++)
4897 *first_mova = new_mova;
4898 return 2;
4900 if (!optimize
4901 || ((f_target
4902 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4903 >= n_target))
4904 return -1;
4906 (*num_mova)--;
4907 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4908 > n_target - n_addr)
4910 fixup_mova (*first_mova);
4911 return 0;
4913 else
4915 fixup_mova (new_mova);
4916 return 1;
4920 /* Find the last barrier from insn FROM which is close enough to hold the
4921 constant pool. If we can't find one, then create one near the end of
4922 the range. */
4923 static rtx_insn *
4924 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
4926 int count_si = 0;
4927 int count_hi = 0;
4928 int found_hi = 0;
4929 int found_si = 0;
4930 int hi_align = 2;
4931 int si_align = 2;
4932 int leading_mova = num_mova;
4933 rtx_insn *barrier_before_mova = NULL;
4934 rtx_insn *found_barrier = NULL;
4935 rtx_insn *good_barrier = NULL;
4936 int si_limit;
4937 int hi_limit;
4938 rtx_insn *orig = from;
4939 rtx_insn *last_got = NULL;
4940 rtx_insn *last_symoff = NULL;
4942 /* For HImode: range is 510, add 4 because pc counts from address of
4943 second instruction after this one, subtract 2 for the jump instruction
4944 that we may need to emit before the table, subtract 2 for the instruction
4945 that fills the jump delay slot (in very rare cases, reorg will take an
4946 instruction from after the constant pool or will leave the delay slot
4947 empty). This gives 510.
4948 For SImode: range is 1020, add 4 because pc counts from address of
4949 second instruction after this one, subtract 2 in case pc is 2 byte
4950 aligned, subtract 2 for the jump instruction that we may need to emit
4951 before the table, subtract 2 for the instruction that fills the jump
4952 delay slot. This gives 1018. */
4954 /* The branch will always be shortened now that the reference address for
4955 forward branches is the successor address, thus we need no longer make
4956 adjustments to the [sh]i_limit for -O0. */
4958 si_limit = 1018;
4959 hi_limit = 510;
4961 while (from && count_si < si_limit && count_hi < hi_limit)
4963 int inc = get_attr_length (from);
4964 int new_align = 1;
4966 /* If this is a label that existed at the time of the compute_alignments
4967 call, determine the alignment. N.B. When find_barrier recurses for
4968 an out-of-reach mova, we might see labels at the start of previously
4969 inserted constant tables. */
4970 if (LABEL_P (from)
4971 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4973 if (optimize)
4974 new_align = 1 << label_to_alignment (from);
4975 else if (BARRIER_P (prev_nonnote_insn (from)))
4976 new_align = 1 << barrier_align (from);
4977 else
4978 new_align = 1;
4979 inc = 0;
4981 /* In case we are scanning a constant table because of recursion, check
4982 for explicit alignments. If the table is long, we might be forced
4983 to emit the new table in front of it; the length of the alignment
4984 might be the last straw. */
4985 else if (NONJUMP_INSN_P (from)
4986 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4987 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4988 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4989 /* When we find the end of a constant table, paste the new constant
4990 at the end. That is better than putting it in front because
4991 this way, we don't need extra alignment for adding a 4-byte-aligned
4992 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4993 else if (NONJUMP_INSN_P (from)
4994 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4995 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4996 return from;
4998 if (BARRIER_P (from))
5000 rtx_insn *next;
5002 found_barrier = from;
5004 /* If we are at the end of the function, or in front of an alignment
5005 instruction, we need not insert an extra alignment. We prefer
5006 this kind of barrier. */
5007 if (barrier_align (from) > 2)
5008 good_barrier = from;
5010 /* If we are at the end of a hot/cold block, dump the constants
5011 here. */
5012 next = NEXT_INSN (from);
5013 if (next
5014 && NOTE_P (next)
5015 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5016 break;
5019 if (broken_move (from))
5021 rtx pat, src, dst;
5022 machine_mode mode;
5024 pat = PATTERN (from);
5025 if (GET_CODE (pat) == PARALLEL)
5026 pat = XVECEXP (pat, 0, 0);
5027 src = SET_SRC (pat);
5028 dst = SET_DEST (pat);
5029 mode = GET_MODE (dst);
5031 /* GOT pcrelat setting comes in pair of
5032 mova .L8,r0
5033 mov.l .L8,r12
5034 instructions. (plus add r0,r12).
5035 Remember if we see one without the other. */
5036 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5037 last_got = last_got ? NULL : from;
5038 else if (PIC_ADDR_P (src))
5039 last_got = last_got ? NULL : from;
5041 /* We must explicitly check the mode, because sometimes the
5042 front end will generate code to load unsigned constants into
5043 HImode targets without properly sign extending them. */
5044 if (mode == HImode
5045 || (mode == SImode && satisfies_constraint_I16 (src)
5046 && REGNO (dst) != FPUL_REG))
5048 found_hi += 2;
5049 /* We put the short constants before the long constants, so
5050 we must count the length of short constants in the range
5051 for the long constants. */
5052 /* ??? This isn't optimal, but is easy to do. */
5053 si_limit -= 2;
5055 else
5057 /* We dump DF/DI constants before SF/SI ones, because
5058 the limit is the same, but the alignment requirements
5059 are higher. We may waste up to 4 additional bytes
5060 for alignment, and the DF/DI constant may have
5061 another SF/SI constant placed before it. */
5062 while (si_align > 2 && found_si + si_align - 2 > count_si)
5063 si_align >>= 1;
5064 if (found_si > count_si)
5065 count_si = found_si;
5066 found_si += GET_MODE_SIZE (mode);
5067 if (num_mova)
5068 si_limit -= GET_MODE_SIZE (mode);
5072 if (mova_p (from))
5074 switch (untangle_mova (&num_mova, &mova, from))
5076 case 1:
5077 if (flag_pic)
5079 rtx src = SET_SRC (PATTERN (from));
5080 if (GET_CODE (src) == CONST
5081 && GET_CODE (XEXP (src, 0)) == UNSPEC
5082 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5083 last_symoff = from;
5085 break;
5086 case 0: return find_barrier (0, 0, mova);
5087 case 2:
5089 leading_mova = 0;
5090 barrier_before_mova
5091 = good_barrier ? good_barrier : found_barrier;
5093 default: break;
5095 if (found_si > count_si)
5096 count_si = found_si;
5098 else if (JUMP_TABLE_DATA_P (from)
5099 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5101 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5102 || (num_mova
5103 && (prev_nonnote_insn (from)
5104 == XEXP (MOVA_LABELREF (mova), 0))))
5105 num_mova--;
5106 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5108 /* We have just passed the barrier in front of the
5109 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5110 the ADDR_DIFF_VEC is accessed as data, just like our pool
5111 constants, this is a good opportunity to accommodate what
5112 we have gathered so far.
5113 If we waited any longer, we could end up at a barrier in
5114 front of code, which gives worse cache usage for separated
5115 instruction / data caches. */
5116 good_barrier = found_barrier;
5117 break;
5119 else
5121 rtx body = PATTERN (from);
5122 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5125 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5126 else if (JUMP_P (from)
5127 && ! TARGET_SH2
5128 && ! optimize_size)
5129 new_align = 4;
5131 /* There is a possibility that a bf is transformed into a bf/s by the
5132 delay slot scheduler. */
5133 if (JUMP_P (from)
5134 && get_attr_type (from) == TYPE_CBRANCH
5135 && ! sequence_insn_p (from))
5136 inc += 2;
5138 if (found_si)
5140 count_si += inc;
5141 if (new_align > si_align)
5143 si_limit -= (count_si - 1) & (new_align - si_align);
5144 si_align = new_align;
5146 count_si = (count_si + new_align - 1) & -new_align;
5148 if (found_hi)
5150 count_hi += inc;
5151 if (new_align > hi_align)
5153 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5154 hi_align = new_align;
5156 count_hi = (count_hi + new_align - 1) & -new_align;
5158 from = NEXT_INSN (from);
5161 if (num_mova)
5163 if (leading_mova)
5165 /* Try as we might, the leading mova is out of range. Change
5166 it into a load (which will become a pcload) and retry. */
5167 fixup_mova (mova);
5168 return find_barrier (0, 0, mova);
5170 else
5172 /* Insert the constant pool table before the mova instruction,
5173 to prevent the mova label reference from going out of range. */
5174 from = mova;
5175 good_barrier = found_barrier = barrier_before_mova;
5179 if (found_barrier)
5181 if (good_barrier && next_real_insn (found_barrier))
5182 found_barrier = good_barrier;
5184 else
5186 /* We didn't find a barrier in time to dump our stuff,
5187 so we'll make one. */
5188 rtx_code_label *label = gen_label_rtx ();
5190 /* Don't emit a constant table in the middle of insns for
5191 casesi_worker_2. This is a bit overkill but is enough
5192 because casesi_worker_2 wouldn't appear so frequently. */
5193 if (last_symoff)
5194 from = last_symoff;
5196 /* If we exceeded the range, then we must back up over the last
5197 instruction we looked at. Otherwise, we just need to undo the
5198 NEXT_INSN at the end of the loop. */
5199 if (PREV_INSN (from) != orig
5200 && (count_hi > hi_limit || count_si > si_limit))
5201 from = PREV_INSN (PREV_INSN (from));
5202 else
5203 from = PREV_INSN (from);
5205 /* Don't emit a constant table int the middle of global pointer setting,
5206 since that that would move the addressing base GOT into another table.
5207 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5208 in the pool anyway, so just move up the whole constant pool.
5210 However, avoid doing so when the last single GOT mov is the starting
5211 insn itself. Going past above the start insn would create a negative
5212 offset, causing errors. */
5213 if (last_got && last_got != orig)
5214 from = PREV_INSN (last_got);
5216 /* Don't insert the constant pool table at the position which
5217 may be the landing pad. */
5218 if (flag_exceptions
5219 && CALL_P (from)
5220 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5221 from = PREV_INSN (from);
5223 /* Walk back to be just before any jump or label.
5224 Putting it before a label reduces the number of times the branch
5225 around the constant pool table will be hit. Putting it before
5226 a jump makes it more likely that the bra delay slot will be
5227 filled. */
5228 while (NOTE_P (from) || JUMP_P (from)
5229 || LABEL_P (from))
5230 from = PREV_INSN (from);
5232 /* Make sure we do not split between a call and its corresponding
5233 CALL_ARG_LOCATION note. */
5234 if (CALL_P (from))
5236 bool sibcall_p = SIBLING_CALL_P (from);
5238 rtx_insn *next = NEXT_INSN (from);
5239 if (next && NOTE_P (next)
5240 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5241 from = next;
5243 /* If FROM was a sibling call, then we know that control
5244 will not return. In fact, we were guaranteed to hit
5245 a barrier before another real insn.
5247 The jump around the constant pool is unnecessary. It
5248 costs space, but more importantly it confuses dwarf2cfi
5249 generation. */
5250 if (sibcall_p)
5251 return emit_barrier_after (from);
5254 from = emit_jump_insn_after (gen_jump (label), from);
5255 JUMP_LABEL (from) = label;
5256 LABEL_NUSES (label) = 1;
5257 found_barrier = emit_barrier_after (from);
5258 emit_label_after (label, found_barrier);
5261 return found_barrier;
5264 /* If the instruction INSN is implemented by a special function, and we can
5265 positively find the register that is used to call the sfunc, and this
5266 register is not used anywhere else in this instruction - except as the
5267 destination of a set, return this register; else, return 0. */
5269 sfunc_uses_reg (rtx_insn *insn)
5271 int i;
5272 rtx pattern, part, reg_part, reg;
5274 if (!NONJUMP_INSN_P (insn))
5275 return NULL_RTX;
5276 pattern = PATTERN (insn);
5277 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5278 return NULL_RTX;
5280 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5282 part = XVECEXP (pattern, 0, i);
5283 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5284 reg_part = part;
5286 if (! reg_part)
5287 return NULL_RTX;
5288 reg = XEXP (reg_part, 0);
5289 for (int i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5291 part = XVECEXP (pattern, 0, i);
5292 if (part == reg_part || GET_CODE (part) == CLOBBER)
5293 continue;
5294 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5295 && REG_P (SET_DEST (part)))
5296 ? SET_SRC (part) : part)))
5297 return NULL_RTX;
5299 return reg;
5302 /* See if the only way in which INSN uses REG is by calling it, or by
5303 setting it while calling it. Set *SET to a SET rtx if the register
5304 is set by INSN. */
5305 static bool
5306 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5308 *set = NULL_RTX;
5310 rtx reg2 = sfunc_uses_reg (insn);
5311 if (reg2 && REGNO (reg2) == REGNO (reg))
5313 rtx pattern = single_set (insn);
5314 if (pattern
5315 && REG_P (SET_DEST (pattern))
5316 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5317 *set = pattern;
5318 return false;
5320 if (!CALL_P (insn))
5322 /* We don't use rtx_equal_p because we don't care if the mode is
5323 different. */
5324 rtx pattern = single_set (insn);
5325 if (pattern
5326 && REG_P (SET_DEST (pattern))
5327 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5329 rtx par, part;
5330 int i;
5332 *set = pattern;
5333 par = PATTERN (insn);
5334 if (GET_CODE (par) == PARALLEL)
5335 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5337 part = XVECEXP (par, 0, i);
5338 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5339 return true;
5341 return reg_mentioned_p (reg, SET_SRC (pattern));
5344 return true;
5347 rtx pattern = PATTERN (insn);
5349 if (GET_CODE (pattern) == PARALLEL)
5351 for (int i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5352 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5353 return true;
5354 pattern = XVECEXP (pattern, 0, 0);
5357 if (GET_CODE (pattern) == SET)
5359 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5361 /* We don't use rtx_equal_p, because we don't care if the
5362 mode is different. */
5363 if (!REG_P (SET_DEST (pattern))
5364 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5365 return true;
5367 *set = pattern;
5370 pattern = SET_SRC (pattern);
5373 if (GET_CODE (pattern) != CALL
5374 || !MEM_P (XEXP (pattern, 0))
5375 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5376 return true;
5378 return false;
5381 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5382 general registers. Bits 0..15 mean that the respective registers
5383 are used as inputs in the instruction. Bits 16..31 mean that the
5384 registers 0..15, respectively, are used as outputs, or are clobbered.
5385 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5387 regs_used (rtx x, int is_dest)
5389 enum rtx_code code;
5390 const char *fmt;
5391 int used = 0;
5393 if (! x)
5394 return used;
5395 code = GET_CODE (x);
5396 switch (code)
5398 case REG:
5399 if (REGNO (x) < 16)
5400 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5401 << (REGNO (x) + is_dest));
5402 return 0;
5403 case SUBREG:
5405 rtx y = SUBREG_REG (x);
5407 if (!REG_P (y))
5408 break;
5409 if (REGNO (y) < 16)
5410 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5411 << (REGNO (y) +
5412 subreg_regno_offset (REGNO (y),
5413 GET_MODE (y),
5414 SUBREG_BYTE (x),
5415 GET_MODE (x)) + is_dest));
5416 return 0;
5418 case SET:
5419 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5420 case RETURN:
5421 /* If there was a return value, it must have been indicated with USE. */
5422 return 0x00ffff00;
5423 case CLOBBER:
5424 is_dest = 1;
5425 break;
5426 case MEM:
5427 is_dest = 0;
5428 break;
5429 case CALL:
5430 used |= 0x00ff00f0;
5431 break;
5432 default:
5433 break;
5436 fmt = GET_RTX_FORMAT (code);
5438 for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5440 if (fmt[i] == 'E')
5442 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
5443 used |= regs_used (XVECEXP (x, i, j), is_dest);
5445 else if (fmt[i] == 'e')
5446 used |= regs_used (XEXP (x, i), is_dest);
5448 return used;
5451 /* Create an instruction that prevents redirection of a conditional branch
5452 to the destination of the JUMP with address ADDR.
5453 If the branch needs to be implemented as an indirect jump, try to find
5454 a scratch register for it.
5455 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5456 If any preceding insn that doesn't fit into a delay slot is good enough,
5457 pass 1. Pass 2 if a definite blocking insn is needed.
5458 -1 is used internally to avoid deep recursion.
5459 If a blocking instruction is made or recognized, return it. */
5460 static rtx_insn *
5461 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5463 int dead = 0;
5464 rtx_insn *prev = prev_nonnote_insn (jump);
5466 /* First, check if we already have an instruction that satisfies our need. */
5467 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5469 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5470 return prev;
5471 if (GET_CODE (PATTERN (prev)) == USE
5472 || GET_CODE (PATTERN (prev)) == CLOBBER
5473 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5474 prev = jump;
5475 else if ((need_block &= ~1) < 0)
5476 return prev;
5477 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5478 need_block = 0;
5480 if (GET_CODE (PATTERN (jump)) == RETURN)
5482 if (! need_block)
5483 return prev;
5484 /* Reorg even does nasty things with return insns that cause branches
5485 to go out of range - see find_end_label and callers. */
5486 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5488 /* We can't use JUMP_LABEL here because it might be undefined
5489 when not optimizing. */
5490 rtx dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5491 /* If the branch is out of range, try to find a scratch register for it. */
5492 if (optimize
5493 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5494 > 4092 + 4098))
5496 rtx_insn *scan;
5497 /* Don't look for the stack pointer as a scratch register,
5498 it would cause trouble if an interrupt occurred. */
5499 unsigned attempt = 0x7fff, used;
5500 int jump_left = flag_expensive_optimizations + 1;
5502 /* It is likely that the most recent eligible instruction is wanted for
5503 the delay slot. Therefore, find out which registers it uses, and
5504 try to avoid using them. */
5506 for (scan = jump; (scan = PREV_INSN (scan)); )
5508 if (scan->deleted ())
5509 continue;
5510 rtx_code code = GET_CODE (scan);
5511 if (code == CODE_LABEL || code == JUMP_INSN)
5512 break;
5513 if (code == INSN
5514 && GET_CODE (PATTERN (scan)) != USE
5515 && GET_CODE (PATTERN (scan)) != CLOBBER
5516 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5518 attempt &= ~regs_used (PATTERN (scan), 0);
5519 break;
5522 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5523 (scan = NEXT_INSN (scan)); )
5525 if (scan->deleted ())
5526 continue;
5527 rtx_code code = GET_CODE (scan);
5528 if (INSN_P (scan))
5530 used |= regs_used (PATTERN (scan), 0);
5531 if (code == CALL_INSN)
5532 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5533 dead |= (used >> 16) & ~used;
5534 if (dead & attempt)
5536 dead &= attempt;
5537 break;
5539 if (code == JUMP_INSN)
5541 if (jump_left-- && simplejump_p (scan))
5542 scan = JUMP_LABEL_AS_INSN (scan);
5543 else
5544 break;
5548 /* Mask out the stack pointer again, in case it was
5549 the only 'free' register we have found. */
5550 dead &= 0x7fff;
5552 /* If the immediate destination is still in range, check for possible
5553 threading with a jump beyond the delay slot insn.
5554 Don't check if we are called recursively; the jump has been or will be
5555 checked in a different invocation then. */
5557 else if (optimize && need_block >= 0)
5559 rtx_insn *next = next_active_insn (as_a<rtx_insn *> (dest));
5560 next = next_active_insn (next);
5561 if (next && JUMP_P (next)
5562 && GET_CODE (PATTERN (next)) == SET
5563 && recog_memoized (next) == CODE_FOR_jump_compact)
5565 dest = JUMP_LABEL (next);
5566 if (dest
5567 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5568 > 4092 + 4098))
5569 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5573 if (dead)
5575 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5577 /* It would be nice if we could convert the jump into an indirect
5578 jump / far branch right now, and thus exposing all constituent
5579 instructions to further optimization. However, reorg uses
5580 simplejump_p to determine if there is an unconditional jump where
5581 it should try to schedule instructions from the target of the
5582 branch; simplejump_p fails for indirect jumps even if they have
5583 a JUMP_LABEL. */
5584 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5585 (reg, GEN_INT (unspec_bbr_uid++)),
5586 jump);
5587 /* ??? We would like this to have the scope of the jump, but that
5588 scope will change when a delay slot insn of an inner scope is added.
5589 Hence, after delay slot scheduling, we'll have to expect
5590 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5591 the jump. */
5593 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5594 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5595 return insn;
5597 else if (need_block)
5598 /* We can't use JUMP_LABEL here because it might be undefined
5599 when not optimizing. */
5600 return emit_insn_before (gen_block_branch_redirect
5601 (GEN_INT (unspec_bbr_uid++)),
5602 jump);
5603 return prev;
5606 #define CONDJUMP_MIN -252
5607 #define CONDJUMP_MAX 262
5608 struct far_branch
5610 /* A label (to be placed) in front of the jump
5611 that jumps to our ultimate destination. */
5612 rtx_insn *near_label;
5613 /* Where we are going to insert it if we cannot move the jump any farther,
5614 or the jump itself if we have picked up an existing jump. */
5615 rtx_insn *insert_place;
5616 /* The ultimate destination. */
5617 rtx_insn *far_label;
5618 struct far_branch *prev;
5619 /* If the branch has already been created, its address;
5620 else the address of its first prospective user. */
5621 int address;
5624 enum mdep_reorg_phase_e mdep_reorg_phase;
5626 static void
5627 gen_far_branch (struct far_branch *bp)
5629 rtx_insn *insn = bp->insert_place;
5630 rtx_jump_insn *jump;
5631 rtx_code_label *label = gen_label_rtx ();
5633 emit_label_after (label, insn);
5634 if (bp->far_label)
5636 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5637 LABEL_NUSES (bp->far_label)++;
5639 else
5640 jump = emit_jump_insn_after (gen_return (), insn);
5642 /* Emit a barrier so that reorg knows that any following instructions
5643 are not reachable via a fall-through path.
5644 But don't do this when not optimizing, since we wouldn't suppress the
5645 alignment for the barrier then, and could end up with out-of-range
5646 pc-relative loads. */
5647 if (optimize)
5648 emit_barrier_after (jump);
5649 emit_label_after (bp->near_label, insn);
5651 if (bp->far_label)
5652 JUMP_LABEL (jump) = bp->far_label;
5653 else
5655 rtx pat = PATTERN (jump);
5656 gcc_assert (ANY_RETURN_P (pat));
5657 JUMP_LABEL (jump) = pat;
5660 bool ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5661 gcc_assert (ok);
5663 /* If we are branching around a jump (rather than a return), prevent
5664 reorg from using an insn from the jump target as the delay slot insn -
5665 when reorg did this, it pessimized code (we rather hide the delay slot)
5666 and it could cause branches to go out of range. */
5667 if (bp->far_label)
5668 (emit_insn_after
5669 (gen_stuff_delay_slot
5670 (GEN_INT (unspec_bbr_uid++),
5671 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5672 insn));
5673 /* Prevent reorg from undoing our splits. */
5674 gen_block_redirect (jump, bp->address += 2, 2);
5677 /* Fix up ADDR_DIFF_VECs. */
5678 void
5679 fixup_addr_diff_vecs (rtx_insn *first)
5681 rtx_insn *insn;
5683 for (insn = first; insn; insn = NEXT_INSN (insn))
5685 rtx vec_lab, pat, prevpat, x, braf_label;
5686 rtx_insn *prev;
5688 if (! JUMP_TABLE_DATA_P (insn)
5689 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5690 continue;
5691 pat = PATTERN (insn);
5692 vec_lab = XEXP (XEXP (pat, 0), 0);
5694 /* Search the matching casesi_jump_2. */
5695 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5697 if (!JUMP_P (prev))
5698 continue;
5699 prevpat = PATTERN (prev);
5700 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5701 continue;
5702 x = XVECEXP (prevpat, 0, 1);
5703 if (GET_CODE (x) != USE)
5704 continue;
5705 x = XEXP (x, 0);
5706 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5707 break;
5709 /* FIXME: This is a bug in the optimizer, but it seems harmless
5710 to just avoid panicing. */
5711 if (!prev)
5712 continue;
5714 /* Emit the reference label of the braf where it belongs, right after
5715 the casesi_jump_2 (i.e. braf). */
5716 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5717 emit_label_after (braf_label, prev);
5719 /* Fix up the ADDR_DIF_VEC to be relative
5720 to the reference address of the braf. */
5721 XEXP (XEXP (pat, 0), 0) = braf_label;
5725 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5726 a barrier. Return the base 2 logarithm of the desired alignment. */
5728 barrier_align (rtx_insn *barrier_or_label)
5730 if (! barrier_or_label)
5731 return 0;
5733 if (LABEL_P (barrier_or_label)
5734 && NEXT_INSN (barrier_or_label)
5735 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5736 return 2;
5738 if (BARRIER_P (barrier_or_label)
5739 && PREV_INSN (barrier_or_label)
5740 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5742 rtx pat = PATTERN (PREV_INSN (barrier_or_label));
5743 /* If this is a very small table, we want to keep the alignment after
5744 the table to the minimum for proper code alignment. */
5745 return ((optimize_size
5746 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5747 <= (unsigned) 1 << (CACHE_LOG - 2)))
5748 ? 1 : align_jumps_log);
5751 rtx_insn *next = next_active_insn (barrier_or_label);
5753 if (! next)
5754 return 0;
5756 rtx pat = PATTERN (next);
5758 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5759 /* This is a barrier in front of a constant table. */
5760 return 0;
5762 if (optimize_size)
5763 return 0;
5765 if (! TARGET_SH2 || ! optimize)
5766 return align_jumps_log;
5768 /* When fixing up pcloads, a constant table might be inserted just before
5769 the basic block that ends with the barrier. Thus, we can't trust the
5770 instruction lengths before that. */
5771 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5773 /* Check if there is an immediately preceding branch to the insn beyond
5774 the barrier. We must weight the cost of discarding useful information
5775 from the current cache line when executing this branch and there is
5776 an alignment, against that of fetching unneeded insn in front of the
5777 branch target when there is no alignment. */
5779 /* There are two delay_slot cases to consider. One is the simple case
5780 where the preceding branch is to the insn beyond the barrier (simple
5781 delay slot filling), and the other is where the preceding branch has
5782 a delay slot that is a duplicate of the insn after the barrier
5783 (fill_eager_delay_slots) and the branch is to the insn after the insn
5784 after the barrier. */
5786 int slot, credit;
5787 bool jump_to_next = false;
5789 /* Skip to the insn before the JUMP_INSN before the barrier under
5790 investigation. */
5791 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5793 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5794 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5795 prev = prev_real_insn (prev))
5797 jump_to_next = false;
5798 if (GET_CODE (PATTERN (prev)) == USE
5799 || GET_CODE (PATTERN (prev)) == CLOBBER)
5800 continue;
5801 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5803 prev = prev_seq->insn (1);
5804 if (INSN_UID (prev) == INSN_UID (next))
5806 /* Delay slot was filled with insn at jump target. */
5807 jump_to_next = true;
5808 continue;
5812 if (slot &&
5813 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5814 slot = 0;
5815 credit -= get_attr_length (prev);
5817 if (prev && jump_to_label_p (prev))
5819 rtx_insn *x;
5820 if (jump_to_next
5821 || next_real_insn (JUMP_LABEL (prev)) == next
5822 /* If relax_delay_slots() decides NEXT was redundant
5823 with some previous instruction, it will have
5824 redirected PREV's jump to the following insn. */
5825 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5826 /* There is no upper bound on redundant instructions
5827 that might have been skipped, but we must not put an
5828 alignment where none had been before. */
5829 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5830 (INSN_P (x)
5831 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5832 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5833 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5835 rtx pat = PATTERN (prev);
5836 if (GET_CODE (pat) == PARALLEL)
5837 pat = XVECEXP (pat, 0, 0);
5838 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5839 return 0;
5844 return align_jumps_log;
5847 /* If we are inside a phony loop, almost any kind of label can turn up as the
5848 first one in the loop. Aligning a braf label causes incorrect switch
5849 destination addresses; we can detect braf labels because they are
5850 followed by a BARRIER.
5851 Applying loop alignment to small constant or switch tables is a waste
5852 of space, so we suppress this too. */
5854 sh_loop_align (rtx_insn *label)
5856 rtx_insn *next = label;
5858 if (! optimize || optimize_size)
5859 return 0;
5862 next = next_nonnote_insn (next);
5863 while (next && LABEL_P (next));
5865 if (! next
5866 || ! INSN_P (next)
5867 || recog_memoized (next) == CODE_FOR_consttable_2)
5868 return 0;
5870 return align_loops_log;
5873 /* Do a final pass over the function, just before delayed branch
5874 scheduling. */
5875 static void
5876 sh_reorg (void)
5878 rtx_insn *first, *insn, *mova = NULL;
5879 int num_mova;
5880 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5881 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5883 first = get_insns ();
5884 max_labelno_before_reorg = max_label_num ();
5886 /* We must split call insns before introducing `mova's. If we're
5887 optimizing, they'll have already been split. Otherwise, make
5888 sure we don't split them too late. */
5889 if (! optimize)
5890 split_all_insns_noflow ();
5892 /* If relaxing, generate pseudo-ops to associate function calls with
5893 the symbols they call. It does no harm to not generate these
5894 pseudo-ops. However, when we can generate them, it enables the
5895 linker to potentially relax the jsr to a bsr, and eliminate the
5896 register load and, possibly, the constant pool entry. */
5898 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5899 if (TARGET_RELAX)
5901 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5902 own purposes. This works because none of the remaining passes
5903 need to look at them.
5905 ??? But it may break in the future. We should use a machine
5906 dependent REG_NOTE, or some other approach entirely. */
5907 for (insn = first; insn; insn = NEXT_INSN (insn))
5909 if (INSN_P (insn))
5911 rtx note;
5913 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5914 NULL_RTX)) != 0)
5915 remove_note (insn, note);
5919 for (insn = first; insn; insn = NEXT_INSN (insn))
5921 rtx pattern, reg, set, dies;
5922 rtx_code_label *label;
5923 rtx_insn *link, *scan;
5924 int rescan = 0, foundinsn = 0;
5926 if (CALL_P (insn))
5928 pattern = PATTERN (insn);
5930 if (GET_CODE (pattern) == PARALLEL)
5931 pattern = XVECEXP (pattern, 0, 0);
5932 if (GET_CODE (pattern) == SET)
5933 pattern = SET_SRC (pattern);
5935 if (GET_CODE (pattern) != CALL
5936 || !MEM_P (XEXP (pattern, 0)))
5937 continue;
5939 reg = XEXP (XEXP (pattern, 0), 0);
5941 else
5943 reg = sfunc_uses_reg (insn);
5944 if (! reg)
5945 continue;
5948 if (!REG_P (reg))
5949 continue;
5951 /* Try scanning backward to find where the register is set. */
5952 link = NULL;
5953 for (scan = PREV_INSN (insn);
5954 scan && !LABEL_P (scan);
5955 scan = PREV_INSN (scan))
5957 if (! INSN_P (scan))
5958 continue;
5960 if (! reg_mentioned_p (reg, scan))
5961 continue;
5963 if (noncall_uses_reg (reg, scan, &set))
5964 break;
5966 if (set)
5968 link = scan;
5969 break;
5973 if (! link)
5974 continue;
5976 /* The register is set at LINK. */
5978 /* We can only optimize the function call if the register is
5979 being set to a symbol. In theory, we could sometimes
5980 optimize calls to a constant location, but the assembler
5981 and linker do not support that at present. */
5982 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5983 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5984 continue;
5986 /* Scan forward from LINK to the place where REG dies, and
5987 make sure that the only insns which use REG are
5988 themselves function calls. */
5990 /* ??? This doesn't work for call targets that were allocated
5991 by reload, since there may not be a REG_DEAD note for the
5992 register. */
5994 dies = NULL_RTX;
5995 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5997 rtx scanset;
5999 /* Don't try to trace forward past a CODE_LABEL if we haven't
6000 seen INSN yet. Ordinarily, we will only find the setting insn
6001 if it is in the same basic block. However,
6002 cross-jumping can insert code labels in between the load and
6003 the call, and can result in situations where a single call
6004 insn may have two targets depending on where we came from. */
6006 if (LABEL_P (scan) && ! foundinsn)
6007 break;
6009 if (! INSN_P (scan))
6010 continue;
6012 /* Don't try to trace forward past a JUMP. To optimize
6013 safely, we would have to check that all the
6014 instructions at the jump destination did not use REG. */
6016 if (JUMP_P (scan))
6017 break;
6019 if (! reg_mentioned_p (reg, scan))
6020 continue;
6022 if (noncall_uses_reg (reg, scan, &scanset))
6023 break;
6025 if (scan == insn)
6026 foundinsn = 1;
6028 if (scan != insn
6029 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6031 /* There is a function call to this register other
6032 than the one we are checking. If we optimize
6033 this call, we need to rescan again below. */
6034 rescan = 1;
6037 /* ??? We shouldn't have to worry about SCANSET here.
6038 We should just be able to check for a REG_DEAD note
6039 on a function call. However, the REG_DEAD notes are
6040 apparently not dependable around libcalls; c-torture
6041 execute/920501-2 is a test case. If SCANSET is set,
6042 then this insn sets the register, so it must have
6043 died earlier. Unfortunately, this will only handle
6044 the cases in which the register is, in fact, set in a
6045 later insn. */
6047 /* ??? We shouldn't have to use FOUNDINSN here.
6048 This dates back to when we used LOG_LINKS to find
6049 the most recent insn which sets the register. */
6051 if (foundinsn
6052 && (scanset
6053 || find_reg_note (scan, REG_DEAD, reg)))
6055 dies = scan;
6056 break;
6060 if (! dies)
6062 /* Either there was a branch, or some insn used REG
6063 other than as a function call address. */
6064 continue;
6067 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6068 on the insn which sets the register, and on each call insn
6069 which uses the register. In final_prescan_insn we look for
6070 the REG_LABEL_OPERAND notes, and output the appropriate label
6071 or pseudo-op. */
6073 label = gen_label_rtx ();
6074 add_reg_note (link, REG_LABEL_OPERAND, label);
6075 add_reg_note (insn, REG_LABEL_OPERAND, label);
6076 if (rescan)
6078 scan = link;
6081 rtx reg2;
6083 scan = NEXT_INSN (scan);
6084 if (scan != insn
6085 && ((CALL_P (scan)
6086 && reg_mentioned_p (reg, scan))
6087 || ((reg2 = sfunc_uses_reg (scan))
6088 && REGNO (reg2) == REGNO (reg))))
6089 add_reg_note (scan, REG_LABEL_OPERAND, label);
6091 while (scan != dies);
6096 if (TARGET_SH2)
6097 fixup_addr_diff_vecs (first);
6099 if (optimize)
6101 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6102 shorten_branches (first);
6105 /* Scan the function looking for move instructions which have to be
6106 changed to pc-relative loads and insert the literal tables. */
6107 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6108 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6110 if (mova_p (insn))
6112 /* ??? basic block reordering can move a switch table dispatch
6113 below the switch table. Check if that has happened.
6114 We only have the addresses available when optimizing; but then,
6115 this check shouldn't be needed when not optimizing. */
6116 if (!untangle_mova (&num_mova, &mova, insn))
6118 insn = mova;
6119 num_mova = 0;
6122 else if (JUMP_TABLE_DATA_P (insn)
6123 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6124 && num_mova
6125 /* ??? loop invariant motion can also move a mova out of a
6126 loop. Since loop does this code motion anyway, maybe we
6127 should wrap UNSPEC_MOVA into a CONST, so that reload can
6128 move it back. */
6129 && ((num_mova > 1
6130 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6131 || (prev_nonnote_insn (insn)
6132 == XEXP (MOVA_LABELREF (mova), 0))))
6134 rtx_insn *scan;
6135 int total;
6137 num_mova--;
6139 /* Some code might have been inserted between the mova and
6140 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6141 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6142 total += get_attr_length (scan);
6144 /* range of mova is 1020, add 4 because pc counts from address of
6145 second instruction after this one, subtract 2 in case pc is 2
6146 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6147 cancels out with alignment effects of the mova itself. */
6148 if (total > 1022)
6150 /* Change the mova into a load, and restart scanning
6151 there. broken_move will then return true for mova. */
6152 fixup_mova (mova);
6153 insn = mova;
6156 if (broken_move (insn)
6157 || (NONJUMP_INSN_P (insn)
6158 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6160 rtx_insn *scan;
6161 /* Scan ahead looking for a barrier to stick the constant table
6162 behind. */
6163 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6164 rtx_insn *last_float_move = NULL;
6165 rtx last_float = 0, *last_float_addr = NULL;
6166 int need_aligned_label = 0;
6168 if (num_mova && ! mova_p (mova))
6170 /* find_barrier had to change the first mova into a
6171 pcload; thus, we have to start with this new pcload. */
6172 insn = mova;
6173 num_mova = 0;
6175 /* Now find all the moves between the points and modify them. */
6176 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6178 if (LABEL_P (scan))
6179 last_float = 0;
6180 if (NONJUMP_INSN_P (scan)
6181 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6182 need_aligned_label = 1;
6183 if (broken_move (scan))
6185 rtx *patp = &PATTERN (scan), pat = *patp;
6186 rtx src, dst;
6187 rtx lab;
6188 rtx newsrc;
6189 machine_mode mode;
6191 if (GET_CODE (pat) == PARALLEL)
6192 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6193 src = SET_SRC (pat);
6194 dst = SET_DEST (pat);
6195 mode = GET_MODE (dst);
6197 if (mode == SImode && satisfies_constraint_I16 (src)
6198 && REGNO (dst) != FPUL_REG)
6200 int offset = 0;
6202 mode = HImode;
6203 while (GET_CODE (dst) == SUBREG)
6205 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6206 GET_MODE (SUBREG_REG (dst)),
6207 SUBREG_BYTE (dst),
6208 GET_MODE (dst));
6209 dst = SUBREG_REG (dst);
6211 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6213 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6215 /* This must be an insn that clobbers r0. */
6216 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6217 XVECLEN (PATTERN (scan), 0)
6218 - 1);
6219 rtx clobber = *clobberp;
6221 gcc_assert (GET_CODE (clobber) == CLOBBER
6222 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6224 if (last_float
6225 && reg_set_between_p (r0_rtx, last_float_move, scan))
6226 last_float = 0;
6227 lab = add_constant (src, mode, last_float);
6228 if (lab)
6229 emit_insn_before (gen_mova (lab), scan);
6230 else
6232 /* There will be a REG_UNUSED note for r0 on
6233 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6234 lest reorg:mark_target_live_regs will not
6235 consider r0 to be used, and we end up with delay
6236 slot insn in front of SCAN that clobbers r0. */
6237 rtx note
6238 = find_regno_note (last_float_move, REG_UNUSED, 0);
6240 /* If we are not optimizing, then there may not be
6241 a note. */
6242 if (note)
6243 PUT_REG_NOTE_KIND (note, REG_INC);
6245 *last_float_addr = r0_inc_rtx;
6247 last_float_move = scan;
6248 last_float = src;
6249 newsrc = gen_const_mem (mode,
6250 (((TARGET_SH4 && ! TARGET_FMOVD)
6251 || REGNO (dst) == FPUL_REG)
6252 ? r0_inc_rtx
6253 : r0_rtx));
6254 last_float_addr = &XEXP (newsrc, 0);
6256 /* Remove the clobber of r0. */
6257 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6258 gen_rtx_SCRATCH (Pmode));
6260 /* This is a mova needing a label. Create it. */
6261 else if (GET_CODE (src) == UNSPEC
6262 && XINT (src, 1) == UNSPEC_MOVA
6263 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6265 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6266 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6267 newsrc = gen_rtx_UNSPEC (SImode,
6268 gen_rtvec (1, newsrc),
6269 UNSPEC_MOVA);
6271 else if (GET_CODE (src) == UNSPEC_VOLATILE
6272 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6274 newsrc = XVECEXP (src, 0, 0);
6275 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6276 INSN_CODE (scan) = -1;
6277 continue;
6279 else
6281 lab = add_constant (src, mode, 0);
6282 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6283 newsrc = gen_const_mem (mode, newsrc);
6285 *patp = gen_rtx_SET (dst, newsrc);
6286 INSN_CODE (scan) = -1;
6289 dump_table (need_aligned_label ? insn : 0, barrier);
6290 insn = barrier;
6293 label_ref_list_d_pool.release ();
6294 for (insn = first; insn; insn = NEXT_INSN (insn))
6295 PUT_MODE (insn, VOIDmode);
6297 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6298 INSN_ADDRESSES_FREE ();
6299 split_branches (first);
6301 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6302 also has an effect on the register that holds the address of the sfunc.
6303 Insert an extra dummy insn in front of each sfunc that pretends to
6304 use this register. */
6305 if (flag_delayed_branch)
6307 for (insn = first; insn; insn = NEXT_INSN (insn))
6309 rtx reg = sfunc_uses_reg (insn);
6311 if (! reg)
6312 continue;
6313 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6316 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6319 /* Return the UID of the insn that follows the specified label. */
6321 get_dest_uid (rtx label, int max_uid)
6323 rtx_insn *dest = next_real_insn (label);
6325 if (! dest)
6326 /* This can happen for an undefined label. */
6327 return 0;
6328 int dest_uid = INSN_UID (dest);
6329 /* If this is a newly created branch redirection blocking instruction,
6330 we cannot index the branch_uid or insn_addresses arrays with its
6331 uid. But then, we won't need to, because the actual destination is
6332 the following branch. */
6333 while (dest_uid >= max_uid)
6335 dest = NEXT_INSN (dest);
6336 dest_uid = INSN_UID (dest);
6338 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6339 return 0;
6340 return dest_uid;
6343 /* Split condbranches that are out of range. Also add clobbers for
6344 scratch registers that are needed in far jumps.
6345 We do this before delay slot scheduling, so that it can take our
6346 newly created instructions into account. It also allows us to
6347 find branches with common targets more easily. */
6348 static void
6349 split_branches (rtx_insn *first)
6351 rtx_insn *insn;
6352 struct far_branch **uid_branch, *far_branch_list = 0;
6353 int max_uid = get_max_uid ();
6354 int ok;
6356 /* Find out which branches are out of range. */
6357 shorten_branches (first);
6359 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6360 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6362 for (insn = first; insn; insn = NEXT_INSN (insn))
6363 if (! INSN_P (insn))
6364 continue;
6365 else if (insn->deleted ())
6367 /* Shorten_branches would split this instruction again,
6368 so transform it into a note. */
6369 SET_INSN_DELETED (insn);
6371 else if (JUMP_P (insn))
6373 enum attr_type type = get_attr_type (insn);
6374 if (type == TYPE_CBRANCH)
6376 rtx_insn *next, *beyond;
6378 if (get_attr_length (insn) > 4)
6380 rtx src = SET_SRC (PATTERN (insn));
6381 rtx olabel = XEXP (XEXP (src, 1), 0);
6382 int addr = INSN_ADDRESSES (INSN_UID (insn));
6383 rtx_insn *label = 0;
6384 int dest_uid = get_dest_uid (olabel, max_uid);
6385 struct far_branch *bp = uid_branch[dest_uid];
6387 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6388 the label if the LABEL_NUSES count drops to zero. There is
6389 always a jump_optimize pass that sets these values, but it
6390 proceeds to delete unreferenced code, and then if not
6391 optimizing, to un-delete the deleted instructions, thus
6392 leaving labels with too low uses counts. */
6393 if (! optimize)
6395 JUMP_LABEL (insn) = olabel;
6396 LABEL_NUSES (olabel)++;
6398 if (! bp)
6400 bp = (struct far_branch *) alloca (sizeof *bp);
6401 uid_branch[dest_uid] = bp;
6402 bp->prev = far_branch_list;
6403 far_branch_list = bp;
6404 bp->far_label = as_a <rtx_insn *> (
6405 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6406 0));
6407 LABEL_NUSES (bp->far_label)++;
6409 else
6411 label = bp->near_label;
6412 if (! label && bp->address - addr >= CONDJUMP_MIN)
6414 rtx_insn *block = bp->insert_place;
6416 if (GET_CODE (PATTERN (block)) == RETURN)
6417 block = PREV_INSN (block);
6418 else
6419 block = gen_block_redirect (block,
6420 bp->address, 2);
6421 label = emit_label_after (gen_label_rtx (),
6422 PREV_INSN (block));
6423 bp->near_label = label;
6425 else if (label && ! NEXT_INSN (label))
6427 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6428 bp->insert_place = insn;
6429 else
6430 gen_far_branch (bp);
6433 if (! label
6434 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6436 bp->near_label = label = gen_label_rtx ();
6437 bp->insert_place = insn;
6438 bp->address = addr;
6440 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6441 gcc_assert (ok);
6443 else
6445 /* get_attr_length (insn) == 2 */
6446 /* Check if we have a pattern where reorg wants to redirect
6447 the branch to a label from an unconditional branch that
6448 is too far away. */
6449 /* We can't use JUMP_LABEL here because it might be undefined
6450 when not optimizing. */
6451 /* A syntax error might cause beyond to be NULL_RTX. */
6452 rtx temp = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6453 beyond = next_active_insn (as_a<rtx_insn *> (temp));
6455 if (beyond
6456 && (JUMP_P (beyond)
6457 || ((beyond = next_active_insn (beyond))
6458 && JUMP_P (beyond)))
6459 && GET_CODE (PATTERN (beyond)) == SET
6460 && recog_memoized (beyond) == CODE_FOR_jump_compact
6461 && ((INSN_ADDRESSES
6462 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6463 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6464 > 252 + 258 + 2))
6465 gen_block_redirect (beyond,
6466 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6469 next = next_active_insn (insn);
6471 if (next
6472 && (JUMP_P (next)
6473 || ((next = next_active_insn (next))
6474 && JUMP_P (next)))
6475 && GET_CODE (PATTERN (next)) == SET
6476 && recog_memoized (next) == CODE_FOR_jump_compact
6477 && ((INSN_ADDRESSES
6478 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6479 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6480 > 252 + 258 + 2))
6481 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6483 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6485 int addr = INSN_ADDRESSES (INSN_UID (insn));
6486 rtx_insn *far_label = 0;
6487 int dest_uid = 0;
6488 struct far_branch *bp;
6490 if (type == TYPE_JUMP)
6492 if (CROSSING_JUMP_P (insn))
6494 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6495 insn);
6496 continue;
6499 far_label = as_a <rtx_insn *> (
6500 XEXP (SET_SRC (PATTERN (insn)), 0));
6501 dest_uid = get_dest_uid (far_label, max_uid);
6502 if (! dest_uid)
6504 /* Parse errors can lead to labels outside
6505 the insn stream. */
6506 if (! NEXT_INSN (far_label))
6507 continue;
6509 if (! optimize)
6511 JUMP_LABEL (insn) = far_label;
6512 LABEL_NUSES (far_label)++;
6514 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6515 far_label = 0;
6518 bp = uid_branch[dest_uid];
6519 if (! bp)
6521 bp = (struct far_branch *) alloca (sizeof *bp);
6522 uid_branch[dest_uid] = bp;
6523 bp->prev = far_branch_list;
6524 far_branch_list = bp;
6525 bp->near_label = 0;
6526 bp->far_label = far_label;
6527 if (far_label)
6528 LABEL_NUSES (far_label)++;
6530 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6531 if (addr - bp->address <= CONDJUMP_MAX)
6532 emit_label_after (bp->near_label, PREV_INSN (insn));
6533 else
6535 gen_far_branch (bp);
6536 bp->near_label = 0;
6538 else
6539 bp->near_label = 0;
6540 bp->address = addr;
6541 bp->insert_place = insn;
6542 if (! far_label)
6543 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6544 else
6545 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6548 /* Generate all pending far branches,
6549 and free our references to the far labels. */
6550 while (far_branch_list)
6552 if (far_branch_list->near_label
6553 && ! NEXT_INSN (far_branch_list->near_label))
6554 gen_far_branch (far_branch_list);
6555 if (optimize
6556 && far_branch_list->far_label
6557 && ! --LABEL_NUSES (far_branch_list->far_label))
6558 delete_insn (far_branch_list->far_label);
6559 far_branch_list = far_branch_list->prev;
6562 /* Instruction length information is no longer valid due to the new
6563 instructions that have been generated. */
6564 init_insn_lengths ();
6567 /* Dump out instruction addresses, which is useful for debugging the
6568 constant pool table stuff.
6570 If relaxing, output the label and pseudo-ops used to link together
6571 calls and the instruction which set the registers.
6573 ??? The addresses printed by this routine for insns are nonsense for
6574 insns which are inside of a sequence where none of the inner insns have
6575 variable length. This is because the second pass of shorten_branches
6576 does not bother to update them. */
6577 void
6578 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6579 int noperands ATTRIBUTE_UNUSED)
6581 if (TARGET_DUMPISIZE)
6582 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6584 if (TARGET_RELAX)
6586 if (rtx note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX))
6588 rtx pattern = PATTERN (insn);
6589 if (GET_CODE (pattern) == PARALLEL)
6590 pattern = XVECEXP (pattern, 0, 0);
6591 switch (GET_CODE (pattern))
6593 case SET:
6594 if (GET_CODE (SET_SRC (pattern)) != CALL
6595 && get_attr_type (insn) != TYPE_SFUNC)
6597 targetm.asm_out.internal_label
6598 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6599 break;
6601 /* FALLTHROUGH */
6602 case CALL:
6603 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6604 CODE_LABEL_NUMBER (XEXP (note, 0)));
6605 break;
6607 default:
6608 gcc_unreachable ();
6614 /* Dump out any constants accumulated in the final pass. These will
6615 only be labels. */
6616 const char *
6617 output_jump_label_table (void)
6619 if (pool_size)
6621 fprintf (asm_out_file, "\t.align 2\n");
6622 for (int i = 0; i < pool_size; i++)
6624 pool_node *p = &pool_vector[i];
6626 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6627 CODE_LABEL_NUMBER (p->label));
6628 output_asm_insn (".long %O0", &p->value);
6630 pool_size = 0;
6633 return "";
6636 /* A full frame looks like:
6638 arg-5
6639 arg-4
6640 [ if current_function_anonymous_args
6641 arg-3
6642 arg-2
6643 arg-1
6644 arg-0 ]
6645 saved-fp
6646 saved-r10
6647 saved-r11
6648 saved-r12
6649 saved-pr
6650 local-n
6652 local-1
6653 local-0 <- fp points here.
6655 Number of bytes pushed for anonymous args, used to pass information
6656 between expand_prologue and expand_epilogue.
6658 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6659 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6660 for an epilogue and a negative value means that it's for a sibcall
6661 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6662 all the registers that are about to be restored, and hence dead. */
6663 static void
6664 output_stack_adjust (int size, rtx reg, int epilogue_p,
6665 HARD_REG_SET *live_regs_mask, bool frame_p)
6667 rtx_insn *(*emit_fn) (rtx) = frame_p ? &emit_frame_insn : &emit_insn;
6668 if (size)
6670 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6672 /* This test is bogus, as output_stack_adjust is used to re-align the
6673 stack. */
6674 #if 0
6675 gcc_assert (!(size % align));
6676 #endif
6678 if (CONST_OK_FOR_ADD (size))
6679 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6680 /* Try to do it with two partial adjustments; however, we must make
6681 sure that the stack is properly aligned at all times, in case
6682 an interrupt occurs between the two partial adjustments. */
6683 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6684 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6686 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6687 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6689 else
6691 rtx const_reg;
6692 rtx insn;
6693 int temp = epilogue_p ? 7 : 1;
6694 int i;
6696 /* If TEMP is invalid, we could temporarily save a general
6697 register to MACL. However, there is currently no need
6698 to handle this case, so just die when we see it. */
6699 if (epilogue_p < 0
6700 || current_function_interrupt
6701 || ! call_really_used_regs[temp] || fixed_regs[temp])
6702 temp = -1;
6703 if (temp < 0 && ! current_function_interrupt && epilogue_p >= 0)
6705 HARD_REG_SET temps;
6706 COPY_HARD_REG_SET (temps, call_used_reg_set);
6707 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6708 if (epilogue_p > 0)
6710 int nreg = 0;
6711 if (crtl->return_rtx)
6713 machine_mode mode;
6714 mode = GET_MODE (crtl->return_rtx);
6715 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6716 nreg = hard_regno_nregs (FIRST_RET_REG, mode);
6718 for (i = 0; i < nreg; i++)
6719 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6720 if (crtl->calls_eh_return)
6722 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6723 for (i = 0; i <= 3; i++)
6724 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6727 if (epilogue_p <= 0)
6729 for (i = FIRST_PARM_REG;
6730 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6731 CLEAR_HARD_REG_BIT (temps, i);
6732 if (cfun->static_chain_decl != NULL)
6733 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6735 temp = scavenge_reg (&temps);
6737 if (temp < 0 && live_regs_mask)
6739 HARD_REG_SET temps;
6741 COPY_HARD_REG_SET (temps, *live_regs_mask);
6742 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6743 temp = scavenge_reg (&temps);
6745 if (temp < 0)
6747 rtx adj_reg, tmp_reg, mem;
6749 /* If we reached here, the most likely case is the (sibcall)
6750 epilogue. Put a special push/pop sequence for such case as
6751 the last resort. This looks lengthy but would not be problem
6752 because it seems to be very rare. */
6753 gcc_assert (epilogue_p);
6755 /* ??? There is still the slight possibility that r4 or
6756 r5 have been reserved as fixed registers or assigned
6757 as global registers, and they change during an
6758 interrupt. There are possible ways to handle this:
6760 - If we are adjusting the frame pointer (r14), we can do
6761 with a single temp register and an ordinary push / pop
6762 on the stack.
6763 - Grab any call-used or call-saved registers (i.e. not
6764 fixed or globals) for the temps we need. We might
6765 also grab r14 if we are adjusting the stack pointer.
6766 If we can't find enough available registers, issue
6767 a diagnostic and die - the user must have reserved
6768 way too many registers.
6769 But since all this is rather unlikely to happen and
6770 would require extra testing, we just die if r4 / r5
6771 are not available. */
6772 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6773 && !global_regs[4] && !global_regs[5]);
6775 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6776 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6777 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6778 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6779 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6780 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6781 emit_move_insn (mem, tmp_reg);
6782 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6783 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6784 emit_move_insn (mem, tmp_reg);
6785 emit_move_insn (reg, adj_reg);
6786 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6787 emit_move_insn (adj_reg, mem);
6788 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6789 emit_move_insn (tmp_reg, mem);
6790 /* Tell flow the insns that pop r4/r5 aren't dead. */
6791 emit_use (tmp_reg);
6792 emit_use (adj_reg);
6793 return;
6795 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6797 /* If SIZE is negative, subtract the positive value.
6798 This sometimes allows a constant pool entry to be shared
6799 between prologue and epilogue code. */
6800 if (size < 0)
6802 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6803 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6805 else
6807 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6808 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6810 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6811 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
6812 GEN_INT (size))));
6817 /* Emit the specified insn and mark it as frame related. */
6818 static rtx_insn *
6819 emit_frame_insn (rtx x)
6821 rtx_insn *insn = emit_insn (x);
6822 RTX_FRAME_RELATED_P (insn) = 1;
6823 return insn;
6826 /* Output RTL to push register RN onto the stack. */
6827 static rtx
6828 push (int rn)
6830 rtx x;
6831 if (rn == FPUL_REG)
6832 x = gen_push_fpul ();
6833 else if (rn == FPSCR_REG)
6834 x = gen_push_fpscr ();
6835 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6836 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6838 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6839 return NULL_RTX;
6840 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6842 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6843 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6844 else
6845 x = gen_push (gen_rtx_REG (SImode, rn));
6847 x = emit_frame_insn (x);
6848 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6849 return x;
6852 /* Output RTL to pop register RN from the stack. */
6853 static void
6854 pop (int rn)
6856 rtx x, sp_reg, reg;
6857 if (rn == FPUL_REG)
6858 x = gen_pop_fpul ();
6859 else if (rn == FPSCR_REG)
6860 x = gen_pop_fpscr ();
6861 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6862 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6864 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6865 return;
6866 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6868 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6869 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6870 else
6871 x = gen_pop (gen_rtx_REG (SImode, rn));
6873 x = emit_insn (x);
6875 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6876 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6877 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6878 : SET_DEST (PATTERN (x)));
6879 add_reg_note (x, REG_CFA_RESTORE, reg);
6880 add_reg_note (x, REG_CFA_ADJUST_CFA,
6881 gen_rtx_SET (sp_reg,
6882 plus_constant (SImode, sp_reg,
6883 GET_MODE_SIZE (GET_MODE (reg)))));
6884 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6885 RTX_FRAME_RELATED_P (x) = 1;
6888 /* Generate code to push the regs specified in the mask. */
6889 static void
6890 push_regs (HARD_REG_SET *mask, bool interrupt_handler)
6892 bool skip_fpscr = false;
6894 /* Push PR last; this gives better latencies after the prologue, and
6895 candidates for the return delay slot when there are no general
6896 registers pushed. */
6897 for (int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6898 i < FIRST_PSEUDO_REGISTER; i++)
6900 /* If this is an interrupt handler, and the SZ bit varies,
6901 and we have to push any floating point register, we need
6902 to switch to the correct precision first. */
6903 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6904 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6906 HARD_REG_SET unsaved;
6908 push (FPSCR_REG);
6909 COMPL_HARD_REG_SET (unsaved, *mask);
6910 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6911 skip_fpscr = true;
6913 if (i != PR_REG
6914 && (i != FPSCR_REG || ! skip_fpscr)
6915 && TEST_HARD_REG_BIT (*mask, i))
6917 /* If the ISR has RESBANK attribute assigned, don't push any of
6918 the following registers - R0-R14, MACH, MACL and GBR. */
6919 if (! (sh_cfun_resbank_handler_p ()
6920 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6921 || i == MACH_REG
6922 || i == MACL_REG
6923 || i == GBR_REG)))
6924 push (i);
6928 /* Push banked registers last to improve delay slot opportunities. */
6929 if (interrupt_handler)
6931 bool use_movml = false;
6933 if (TARGET_SH2A)
6935 unsigned int count = 0;
6937 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6938 if (TEST_HARD_REG_BIT (*mask, i))
6939 count++;
6940 else
6941 break;
6943 /* Use movml when all banked registers are pushed. */
6944 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6945 use_movml = true;
6948 if (sh_cfun_resbank_handler_p ())
6949 ; /* Do nothing. */
6950 else if (use_movml)
6952 rtx x, mem, reg, set;
6953 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6955 /* We must avoid scheduling multiple store insn with another
6956 insns. */
6957 emit_insn (gen_blockage ());
6958 x = gen_movml_push_banked (sp_reg);
6959 x = emit_frame_insn (x);
6960 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6962 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
6963 reg = gen_rtx_REG (SImode, i);
6964 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6967 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
6968 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6969 emit_insn (gen_blockage ());
6971 else
6972 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6973 if (TEST_HARD_REG_BIT (*mask, i))
6974 push (i);
6977 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6978 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6979 push (PR_REG);
6982 /* Work out the registers which need to be saved, both as a mask and a
6983 count of saved words. Return the count.
6985 If doing a pragma interrupt function, then push all regs used by the
6986 function, and if we call another function (we can tell by looking at PR),
6987 make sure that all the regs it clobbers are safe too. */
6988 static int
6989 calc_live_regs (HARD_REG_SET *live_regs_mask)
6991 unsigned int reg;
6992 tree attrs;
6993 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6994 bool nosave_low_regs;
6996 attrs = DECL_ATTRIBUTES (current_function_decl);
6997 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6998 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6999 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7000 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7002 CLEAR_HARD_REG_SET (*live_regs_mask);
7003 if (TARGET_FPU_DOUBLE && TARGET_FMOVD && interrupt_handler
7004 && df_regs_ever_live_p (FPSCR_REG))
7005 target_flags &= ~MASK_FPU_SINGLE;
7006 /* If we can save a lot of saves by switching to double mode, do that. */
7007 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD && TARGET_FPU_SINGLE)
7008 for (int count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7009 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7010 && (! call_really_used_regs[reg]
7011 || interrupt_handler)
7012 && ++count > 2)
7014 target_flags &= ~MASK_FPU_SINGLE;
7015 break;
7019 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7020 bool pr_live = (pr_initial
7021 ? (!REG_P (pr_initial)
7022 || REGNO (pr_initial) != (PR_REG))
7023 : df_regs_ever_live_p (PR_REG));
7024 /* For Shcompact, if not optimizing, we end up with a memory reference
7025 using the return address pointer for __builtin_return_address even
7026 though there is no actual need to put the PR register on the stack. */
7027 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7029 /* Force PR to be live if the prologue has to call the SHmedia
7030 argument decoder or register saver. */
7031 bool has_call = pr_live;
7033 int count;
7034 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7036 if (reg == PR_REG
7037 ? pr_live
7038 : interrupt_handler
7039 ? (/* Need to save all the regs ever live. */
7040 (df_regs_ever_live_p (reg)
7041 || (call_really_used_regs[reg]
7042 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7043 || reg == PIC_OFFSET_TABLE_REGNUM)
7044 && has_call))
7045 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7046 && reg != RETURN_ADDRESS_POINTER_REGNUM
7047 && reg != T_REG && reg != GBR_REG
7048 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7049 /* Push fpscr only on targets which have FPU */
7050 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7051 : (/* Only push those regs which are used and need to be saved. */
7052 (false)
7053 || (df_regs_ever_live_p (reg)
7054 && ((!call_really_used_regs[reg]
7055 && !(reg != PIC_OFFSET_TABLE_REGNUM
7056 && fixed_regs[reg] && call_used_regs[reg]))
7057 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7058 || (crtl->calls_eh_return
7059 && (reg == EH_RETURN_DATA_REGNO (0)
7060 || reg == EH_RETURN_DATA_REGNO (1)
7061 || reg == EH_RETURN_DATA_REGNO (2)
7062 || reg == EH_RETURN_DATA_REGNO (3)))
7063 || ((reg == MACL_REG || reg == MACH_REG)
7064 && df_regs_ever_live_p (reg)
7065 && sh_cfun_attr_renesas_p ())
7068 SET_HARD_REG_BIT (*live_regs_mask, reg);
7069 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7071 if (TARGET_FPU_DOUBLE && TARGET_FMOVD
7072 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7074 if (FP_REGISTER_P (reg))
7076 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7078 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7079 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7082 else if (XD_REGISTER_P (reg))
7084 /* Must switch to double mode to access these registers. */
7085 target_flags &= ~MASK_FPU_SINGLE;
7089 if (nosave_low_regs && reg == R8_REG)
7090 break;
7093 return count;
7096 /* Code to generate prologue and epilogue sequences */
7098 /* PUSHED is the number of bytes that are being pushed on the
7099 stack for register saves. Return the frame size, padded
7100 appropriately so that the stack stays properly aligned. */
7101 static HOST_WIDE_INT
7102 rounded_frame_size (int pushed)
7104 HOST_WIDE_INT size = get_frame_size ();
7105 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7107 if (ACCUMULATE_OUTGOING_ARGS)
7108 size += crtl->outgoing_args_size;
7110 return ((size + pushed + align - 1) & -align) - pushed;
7113 /* Expand code for the function prologue. */
7114 void
7115 sh_expand_prologue (void)
7117 int save_flags = target_flags;
7118 tree sp_switch_attr
7119 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7121 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7123 /* We have pretend args if we had an object sent partially in registers
7124 and partially on the stack, e.g. a large structure. */
7125 int pretend_args = crtl->args.pretend_args_size;
7126 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7127 && (NPARM_REGS(SImode)
7128 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7129 pretend_args = 0;
7131 output_stack_adjust (-pretend_args, stack_pointer_rtx, 0, NULL, true);
7132 int stack_usage = pretend_args;
7134 /* Emit the code for SETUP_VARARGS. */
7135 if (cfun->stdarg)
7137 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7139 /* Push arg regs as if they'd been provided by caller in stack. */
7140 for (int i = 0; i < NPARM_REGS(SImode); i++)
7142 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7144 if (i >= (NPARM_REGS(SImode)
7145 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7147 break;
7148 push (rn);
7149 stack_usage += GET_MODE_SIZE (SImode);
7154 /* If we're supposed to switch stacks at function entry, do so now. */
7155 if (sp_switch_attr)
7157 rtx lab, newsrc;
7158 /* The argument specifies a variable holding the address of the
7159 stack the interrupt function should switch to/from at entry/exit. */
7160 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7161 const char* s = ggc_strdup (TREE_STRING_POINTER (arg));
7162 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7164 lab = add_constant (sp_switch, SImode, 0);
7165 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7167 emit_insn (gen_sp_switch_1 (newsrc));
7170 HARD_REG_SET live_regs_mask;
7171 int d = calc_live_regs (&live_regs_mask);
7172 /* ??? Maybe we could save some switching if we can move a mode switch
7173 that already happens to be at the function start into the prologue. */
7174 if (target_flags != save_flags && ! current_function_interrupt)
7175 emit_insn (gen_toggle_sz ());
7177 push_regs (&live_regs_mask, current_function_interrupt);
7178 stack_usage += d;
7180 if (flag_pic && !TARGET_FDPIC
7181 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7182 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7184 if (target_flags != save_flags && ! current_function_interrupt)
7185 emit_insn (gen_toggle_sz ());
7187 target_flags = save_flags;
7189 output_stack_adjust (-rounded_frame_size (d),
7190 stack_pointer_rtx, 0, NULL, true);
7191 stack_usage += rounded_frame_size (d);
7193 if (frame_pointer_needed)
7194 emit_frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7196 /* If we are profiling, make sure no instructions are scheduled before
7197 the call to mcount. Similarly if some call instructions are swapped
7198 before frame related insns, it'll confuse the unwinder because
7199 currently SH has no unwind info for function epilogues. */
7200 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7201 emit_insn (gen_blockage ());
7203 if (flag_stack_usage_info)
7204 current_function_static_stack_size = stack_usage;
7207 /* Expand code for the function epilogue. */
7208 void
7209 sh_expand_epilogue (bool sibcall_p)
7211 int save_flags = target_flags;
7212 bool fpscr_deferred = false;
7213 int e = sibcall_p ? -1 : 1;
7215 HARD_REG_SET live_regs_mask;
7216 int d = calc_live_regs (&live_regs_mask);
7218 int save_size = d;
7219 int frame_size = rounded_frame_size (d);
7221 if (frame_pointer_needed)
7223 /* We must avoid scheduling the epilogue with previous basic blocks.
7224 See PR/18032 and PR/40313. */
7225 emit_insn (gen_blockage ());
7226 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7227 &live_regs_mask, true);
7229 /* We must avoid moving the stack pointer adjustment past code
7230 which reads from the local frame, else an interrupt could
7231 occur after the SP adjustment and clobber data in the local
7232 frame. */
7233 emit_insn (gen_blockage ());
7234 emit_frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7236 else if (frame_size)
7238 /* We must avoid moving the stack pointer adjustment past code
7239 which reads from the local frame, else an interrupt could
7240 occur after the SP adjustment and clobber data in the local
7241 frame. */
7242 emit_insn (gen_blockage ());
7243 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7244 &live_regs_mask, true);
7247 /* Pop all the registers. */
7249 if (target_flags != save_flags && ! current_function_interrupt)
7250 emit_insn (gen_toggle_sz ());
7253 int last_reg;
7255 save_size = 0;
7256 /* For an ISR with RESBANK attribute assigned, don't pop PR
7257 register. */
7258 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7259 && !sh_cfun_resbank_handler_p ())
7261 if (!frame_pointer_needed)
7262 emit_insn (gen_blockage ());
7263 pop (PR_REG);
7266 /* Banked registers are popped first to avoid being scheduled in the
7267 delay slot. RTE switches banks before the ds instruction. */
7268 if (current_function_interrupt)
7270 bool use_movml = false;
7272 if (TARGET_SH2A)
7274 unsigned int count = 0;
7276 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7277 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7278 count++;
7279 else
7280 break;
7282 /* Use movml when all banked register are poped. */
7283 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7284 use_movml = true;
7287 if (sh_cfun_resbank_handler_p ())
7288 ; /* Do nothing. */
7289 else if (use_movml)
7291 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7293 /* We must avoid scheduling multiple load insn with another
7294 insns. */
7295 emit_insn (gen_blockage ());
7296 emit_insn (gen_movml_pop_banked (sp_reg));
7297 emit_insn (gen_blockage ());
7299 else
7300 for (int i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7301 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7302 pop (i);
7304 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7306 else
7307 last_reg = FIRST_PSEUDO_REGISTER;
7309 for (int i = 0; i < last_reg; i++)
7311 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7313 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7314 && hard_reg_set_intersect_p (live_regs_mask,
7315 reg_class_contents[DF_REGS]))
7316 fpscr_deferred = true;
7317 /* For an ISR with RESBANK attribute assigned, don't pop
7318 following registers, R0-R14, MACH, MACL and GBR. */
7319 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7320 && ! (sh_cfun_resbank_handler_p ()
7321 && ((j >= FIRST_GENERAL_REG
7322 && j < LAST_GENERAL_REG)
7323 || j == MACH_REG
7324 || j == MACL_REG
7325 || j == GBR_REG)))
7326 pop (j);
7328 if (j == FIRST_FP_REG && fpscr_deferred)
7329 pop (FPSCR_REG);
7332 if (target_flags != save_flags && ! current_function_interrupt)
7333 emit_insn (gen_toggle_sz ());
7334 target_flags = save_flags;
7336 output_stack_adjust (crtl->args.pretend_args_size + save_size,
7337 stack_pointer_rtx, e, NULL, true);
7339 if (crtl->calls_eh_return)
7340 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7341 EH_RETURN_STACKADJ_RTX));
7343 /* Switch back to the normal stack if necessary. */
7344 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7345 emit_insn (gen_sp_switch_2 ());
7347 /* Tell flow the insn that pops PR isn't dead. */
7348 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7349 emit_use (gen_rtx_REG (SImode, PR_REG));
7352 /* Emit code to change the current function's return address to RA.
7353 TEMP is available as a scratch register, if needed. */
7354 void
7355 sh_set_return_address (rtx ra, rtx tmp)
7357 HARD_REG_SET live_regs_mask;
7358 int d = calc_live_regs (&live_regs_mask);
7360 /* If pr_reg isn't life, we can set it directly. */
7361 if (! TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7363 rtx rr = gen_rtx_REG (SImode, PR_REG);
7364 emit_insn (GEN_MOV (rr, ra));
7365 /* Tell flow the register for return isn't dead. */
7366 emit_use (rr);
7367 return;
7370 int pr_offset = rounded_frame_size (d);
7372 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7374 if (frame_pointer_needed)
7375 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7376 else
7377 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7379 tmp = gen_frame_mem (Pmode, tmp);
7380 emit_insn (GEN_MOV (tmp, ra));
7381 /* Tell this store isn't dead. */
7382 emit_use (tmp);
7385 /* Clear variables at function end. */
7386 static void
7387 sh_output_function_epilogue (FILE *)
7391 static rtx
7392 sh_builtin_saveregs (void)
7394 /* First unnamed integer register. */
7395 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7396 /* Number of integer registers we need to save. */
7397 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7398 /* First unnamed SFmode float reg */
7399 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7400 /* Number of SFmode float regs to save. */
7401 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7402 rtx regbuf, fpregs;
7403 int bufsize, regno;
7404 alias_set_type alias_set;
7406 if (!TARGET_FPU_ANY)
7408 error ("__builtin_saveregs not supported by this subtarget");
7409 return const0_rtx;
7412 /* Allocate block of memory for the regs. */
7413 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7414 Or can assign_stack_local accept a 0 SIZE argument? */
7415 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7417 if (n_floatregs & 1)
7419 rtx addr;
7421 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7422 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7423 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7424 regbuf = change_address (regbuf, BLKmode, addr);
7426 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7428 rtx addr, mask;
7430 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7431 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
7432 XEXP (regbuf, 0), 4));
7433 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7434 emit_insn (gen_andsi3 (addr, addr, mask));
7435 regbuf = change_address (regbuf, BLKmode, addr);
7437 else
7438 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7439 alias_set = get_varargs_alias_set ();
7440 set_mem_alias_set (regbuf, alias_set);
7442 /* Save int args.
7443 This is optimized to only save the regs that are necessary. Explicitly
7444 named args need not be saved. */
7445 if (n_intregs > 0)
7446 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7447 adjust_address (regbuf, BLKmode,
7448 n_floatregs * UNITS_PER_WORD),
7449 n_intregs);
7451 /* Save float args.
7452 This is optimized to only save the regs that are necessary. Explicitly
7453 named args need not be saved.
7454 We explicitly build a pointer to the buffer because it halves the insn
7455 count when not optimizing (otherwise the pointer is built for each reg
7456 saved).
7457 We emit the moves in reverse order so that we can use predecrement. */
7459 fpregs = copy_to_mode_reg (Pmode,
7460 plus_constant (Pmode, XEXP (regbuf, 0),
7461 n_floatregs * UNITS_PER_WORD));
7462 if (TARGET_FPU_DOUBLE)
7464 rtx mem;
7465 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7467 emit_insn (gen_addsi3 (fpregs, fpregs,
7468 GEN_INT (-2 * UNITS_PER_WORD)));
7469 mem = change_address (regbuf, DFmode, fpregs);
7470 emit_move_insn (mem,
7471 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7473 regno = first_floatreg;
7474 if (regno & 1)
7476 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7477 mem = change_address (regbuf, SFmode, fpregs);
7478 emit_move_insn (mem,
7479 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
7480 + regno - SH_REG_MSW_OFFSET));
7483 else
7484 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7486 rtx mem;
7488 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7489 mem = change_address (regbuf, SFmode, fpregs);
7490 emit_move_insn (mem,
7491 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7494 /* Return the address of the regbuf. */
7495 return XEXP (regbuf, 0);
7498 /* Define the `__builtin_va_list' type for the ABI. */
7499 static tree
7500 sh_build_builtin_va_list (void)
7502 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7503 tree record, type_decl;
7505 if ((! TARGET_SH2E && ! TARGET_SH4)
7506 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7507 return ptr_type_node;
7509 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7510 type_decl = build_decl (BUILTINS_LOCATION,
7511 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7513 f_next_o = build_decl (BUILTINS_LOCATION,
7514 FIELD_DECL, get_identifier ("__va_next_o"),
7515 ptr_type_node);
7516 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7517 FIELD_DECL,
7518 get_identifier ("__va_next_o_limit"),
7519 ptr_type_node);
7520 f_next_fp = build_decl (BUILTINS_LOCATION,
7521 FIELD_DECL, get_identifier ("__va_next_fp"),
7522 ptr_type_node);
7523 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7524 FIELD_DECL,
7525 get_identifier ("__va_next_fp_limit"),
7526 ptr_type_node);
7527 f_next_stack = build_decl (BUILTINS_LOCATION,
7528 FIELD_DECL, get_identifier ("__va_next_stack"),
7529 ptr_type_node);
7531 DECL_FIELD_CONTEXT (f_next_o) = record;
7532 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7533 DECL_FIELD_CONTEXT (f_next_fp) = record;
7534 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7535 DECL_FIELD_CONTEXT (f_next_stack) = record;
7537 TYPE_STUB_DECL (record) = type_decl;
7538 TYPE_NAME (record) = type_decl;
7539 TYPE_FIELDS (record) = f_next_o;
7540 DECL_CHAIN (f_next_o) = f_next_o_limit;
7541 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7542 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7543 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7545 layout_type (record);
7547 return record;
7550 /* Implement `va_start' for varargs and stdarg. */
7551 static void
7552 sh_va_start (tree valist, rtx nextarg)
7554 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7555 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7556 tree t, u;
7557 int nfp, nint;
7559 if ((! TARGET_SH2E && ! TARGET_SH4)
7560 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7562 std_expand_builtin_va_start (valist, nextarg);
7563 return;
7566 f_next_o = TYPE_FIELDS (va_list_type_node);
7567 f_next_o_limit = DECL_CHAIN (f_next_o);
7568 f_next_fp = DECL_CHAIN (f_next_o_limit);
7569 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7570 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7572 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7573 NULL_TREE);
7574 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7575 valist, f_next_o_limit, NULL_TREE);
7576 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7577 NULL_TREE);
7578 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7579 valist, f_next_fp_limit, NULL_TREE);
7580 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7581 valist, f_next_stack, NULL_TREE);
7583 /* Call __builtin_saveregs. */
7584 u = make_tree (sizetype, expand_builtin_saveregs ());
7585 u = fold_convert (ptr_type_node, u);
7586 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7587 TREE_SIDE_EFFECTS (t) = 1;
7588 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7590 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7591 if (nfp < 8)
7592 nfp = 8 - nfp;
7593 else
7594 nfp = 0;
7595 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
7596 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7597 TREE_SIDE_EFFECTS (t) = 1;
7598 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7600 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7601 TREE_SIDE_EFFECTS (t) = 1;
7602 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7604 nint = crtl->args.info.arg_count[SH_ARG_INT];
7605 if (nint < 4)
7606 nint = 4 - nint;
7607 else
7608 nint = 0;
7609 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
7610 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7611 TREE_SIDE_EFFECTS (t) = 1;
7612 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7614 u = make_tree (ptr_type_node, nextarg);
7615 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7616 TREE_SIDE_EFFECTS (t) = 1;
7617 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7620 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7621 member, return it. */
7622 static tree
7623 find_sole_member (tree type)
7625 tree field, member = NULL_TREE;
7627 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7629 if (TREE_CODE (field) != FIELD_DECL)
7630 continue;
7631 if (!DECL_SIZE (field))
7632 return NULL_TREE;
7633 if (integer_zerop (DECL_SIZE (field)))
7634 continue;
7635 if (member)
7636 return NULL_TREE;
7637 member = field;
7639 return member;
7642 /* Implement `va_arg'. */
7643 static tree
7644 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7645 gimple_seq *post_p ATTRIBUTE_UNUSED)
7647 tree tmp;
7648 tree addr, lab_over = NULL, result = NULL;
7649 tree eff_type;
7651 const bool pass_by_ref =
7652 !VOID_TYPE_P (type)
7653 && targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7655 if (pass_by_ref)
7656 type = build_pointer_type (type);
7658 HOST_WIDE_INT size = int_size_in_bytes (type);
7659 HOST_WIDE_INT rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7660 tree pptr_type_node = build_pointer_type (ptr_type_node);
7662 if ((TARGET_SH2E || TARGET_SH4)
7663 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7665 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7666 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7667 tree lab_false;
7668 tree member;
7670 f_next_o = TYPE_FIELDS (va_list_type_node);
7671 f_next_o_limit = DECL_CHAIN (f_next_o);
7672 f_next_fp = DECL_CHAIN (f_next_o_limit);
7673 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7674 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7676 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7677 NULL_TREE);
7678 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7679 valist, f_next_o_limit, NULL_TREE);
7680 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7681 valist, f_next_fp, NULL_TREE);
7682 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7683 valist, f_next_fp_limit, NULL_TREE);
7684 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7685 valist, f_next_stack, NULL_TREE);
7687 /* Structures with a single member with a distinct mode are passed
7688 like their member. This is relevant if the latter has a REAL_TYPE
7689 or COMPLEX_TYPE type. */
7690 eff_type = type;
7691 while (TREE_CODE (eff_type) == RECORD_TYPE
7692 && (member = find_sole_member (eff_type))
7693 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7694 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7695 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7697 tree field_type = TREE_TYPE (member);
7699 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7700 eff_type = field_type;
7701 else
7703 gcc_assert ((TYPE_ALIGN (eff_type)
7704 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7705 || (TYPE_ALIGN (eff_type)
7706 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7707 break;
7711 bool pass_as_float;
7712 if (TARGET_FPU_DOUBLE)
7714 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7715 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7716 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7717 && size <= 16));
7719 else
7721 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7724 addr = create_tmp_var (pptr_type_node);
7725 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7726 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7728 valist = build_simple_mem_ref (addr);
7730 if (pass_as_float)
7732 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
7733 tree cmp;
7734 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7736 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7737 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7739 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7740 tmp = next_fp_limit;
7741 if (size > 4 && !is_double)
7742 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
7743 tmp = build2 (GE_EXPR, boolean_type_node,
7744 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7745 cmp = build3 (COND_EXPR, void_type_node, tmp,
7746 build1 (GOTO_EXPR, void_type_node,
7747 unshare_expr (lab_false)), NULL_TREE);
7748 if (!is_double)
7749 gimplify_and_add (cmp, pre_p);
7751 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7752 || (is_double || size == 16))
7754 tmp = fold_convert (sizetype, next_fp_tmp);
7755 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7756 size_int (UNITS_PER_WORD));
7757 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
7758 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7760 if (is_double)
7761 gimplify_and_add (cmp, pre_p);
7763 #ifdef FUNCTION_ARG_SCmode_WART
7764 if (TYPE_MODE (eff_type) == SCmode
7765 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7767 tree subtype = TREE_TYPE (eff_type);
7768 tree real, imag;
7770 imag
7771 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7772 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7774 real
7775 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7776 real = get_initialized_tmp_var (real, pre_p, NULL);
7778 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7779 if (type != eff_type)
7780 result = build1 (VIEW_CONVERT_EXPR, type, result);
7781 result = get_initialized_tmp_var (result, pre_p, NULL);
7783 #endif /* FUNCTION_ARG_SCmode_WART */
7785 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7786 gimplify_and_add (tmp, pre_p);
7788 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7789 gimplify_and_add (tmp, pre_p);
7791 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7792 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7793 gimplify_assign (unshare_expr (next_fp_tmp),
7794 unshare_expr (valist), pre_p);
7796 gimplify_assign (unshare_expr (valist),
7797 unshare_expr (next_fp_tmp), post_p);
7798 valist = next_fp_tmp;
7800 else
7802 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
7803 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7804 unshare_expr (next_o_limit));
7805 tmp = build3 (COND_EXPR, void_type_node, tmp,
7806 build1 (GOTO_EXPR, void_type_node,
7807 unshare_expr (lab_false)),
7808 NULL_TREE);
7809 gimplify_and_add (tmp, pre_p);
7811 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7812 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7814 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7815 gimplify_and_add (tmp, pre_p);
7817 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7818 gimplify_and_add (tmp, pre_p);
7820 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7821 gimplify_assign (unshare_expr (next_o),
7822 unshare_expr (next_o_limit), pre_p);
7824 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7825 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7828 if (!result)
7830 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7831 gimplify_and_add (tmp, pre_p);
7835 /* ??? In va-sh.h, there had been code to make values larger than
7836 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7838 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7839 if (result)
7841 gimplify_assign (result, tmp, pre_p);
7842 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7843 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7844 gimplify_and_add (tmp, pre_p);
7846 else
7847 result = tmp;
7849 if (pass_by_ref)
7850 result = build_va_arg_indirect_ref (result);
7852 return result;
7855 /* 64 bit floating points memory transfers are paired single precision loads
7856 or store. So DWARF information needs fixing in little endian (unless
7857 PR=SZ=1 in FPSCR). */
7859 sh_dwarf_register_span (rtx reg)
7861 unsigned regno = REGNO (reg);
7863 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7864 return NULL_RTX;
7866 return
7867 gen_rtx_PARALLEL (VOIDmode,
7868 gen_rtvec (2,
7869 gen_rtx_REG (SFmode, regno + 1),
7870 gen_rtx_REG (SFmode, regno)));
7873 static machine_mode
7874 sh_promote_function_mode (const_tree type, machine_mode mode,
7875 int *punsignedp, const_tree funtype,
7876 int for_return)
7878 if (sh_promote_prototypes (funtype))
7879 return promote_mode (type, mode, punsignedp);
7880 else
7881 return default_promote_function_mode (type, mode, punsignedp, funtype,
7882 for_return);
7885 static bool
7886 sh_promote_prototypes (const_tree type)
7888 if (TARGET_HITACHI)
7889 return false;
7890 if (! type)
7891 return true;
7892 return ! sh_attr_renesas_p (type);
7895 static bool
7896 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7897 const_tree type, bool named ATTRIBUTE_UNUSED)
7899 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7901 if (targetm.calls.must_pass_in_stack (mode, type))
7902 return true;
7904 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7905 wants to know about pass-by-reference semantics for incoming
7906 arguments. */
7907 if (! cum)
7908 return false;
7910 return false;
7913 static bool
7914 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
7915 const_tree type, bool named ATTRIBUTE_UNUSED)
7917 /* ??? How can it possibly be correct to return true only on the
7918 caller side of the equation? Is there someplace else in the
7919 sh backend that's magically producing the copies? */
7920 return (get_cumulative_args (cum)->outgoing
7921 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7922 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7925 static sh_arg_class
7926 get_sh_arg_class (machine_mode mode)
7928 if (TARGET_FPU_ANY && mode == SFmode)
7929 return SH_ARG_FLOAT;
7931 if (TARGET_FPU_DOUBLE
7932 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7933 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT))
7934 return SH_ARG_FLOAT;
7936 return SH_ARG_INT;
7939 /* Round a register number up to a proper boundary for an arg of mode
7940 MODE.
7941 The SH doesn't care about double alignment, so we only
7942 round doubles to even regs when asked to explicitly. */
7943 static int
7944 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
7946 /* FIXME: This used to be a macro and has been copy pasted into this
7947 function as is. Make this more readable. */
7948 return
7949 (((TARGET_ALIGN_DOUBLE
7950 || (TARGET_FPU_DOUBLE
7951 && (mode == DFmode || mode == DCmode)
7952 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
7953 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
7954 ? (cum.arg_count[(int) get_sh_arg_class (mode)]
7955 + (cum.arg_count[(int) get_sh_arg_class (mode)] & 1))
7956 : cum.arg_count[(int) get_sh_arg_class (mode)]);
7959 /* Return true if arg of the specified mode should be passed in a register
7960 or false otherwise. */
7961 static bool
7962 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
7963 const_tree type)
7965 /* FIXME: This used to be a macro and has been copy pasted into this
7966 function as is. Make this more readable. */
7967 return
7968 ((type == 0
7969 || (! TREE_ADDRESSABLE (type)
7970 && (! (TARGET_HITACHI || cum.renesas_abi)
7971 || ! (AGGREGATE_TYPE_P (type)
7972 || (!TARGET_FPU_ANY
7973 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7974 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
7975 && ! cum.force_mem
7976 && (TARGET_SH2E
7977 ? ((mode) == BLKmode
7978 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
7979 + int_size_in_bytes (type))
7980 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
7981 : ((sh_round_reg (cum, mode)
7982 + sh_hard_regno_nregs (BASE_ARG_REG (mode), mode))
7983 <= NPARM_REGS (mode)))
7984 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
7987 static int
7988 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
7989 tree type, bool named ATTRIBUTE_UNUSED)
7991 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7992 int words = 0;
7994 if (sh_pass_in_reg_p (*cum, mode, type)
7995 && !TARGET_FPU_DOUBLE
7996 && (sh_round_reg (*cum, mode)
7997 + (mode != BLKmode
7998 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
7999 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
8000 > NPARM_REGS (mode)))
8001 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
8003 return words * UNITS_PER_WORD;
8007 /* Define where to put the arguments to a function.
8008 Value is zero to push the argument on the stack,
8009 or a hard register in which to store the argument.
8011 MODE is the argument's machine mode.
8012 TYPE is the data type of the argument (as a tree).
8013 This is null for libcalls where that information may
8014 not be available.
8015 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8016 the preceding args and about the function being called.
8017 NAMED is nonzero if this argument is a named parameter
8018 (otherwise it is an extra parameter matching an ellipsis).
8020 On SH the first args are normally in registers
8021 and the rest are pushed. Any arg that starts within the first
8022 NPARM_REGS words is at least partially passed in a register unless
8023 its data type forbids. */
8024 static rtx
8025 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
8026 const_tree type, bool named)
8028 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8030 if (mode == VOIDmode)
8031 return ca->renesas_abi ? const1_rtx : const0_rtx;
8033 if (sh_pass_in_reg_p (*ca, mode, type)
8034 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8036 int regno;
8038 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8039 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8041 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8042 gen_rtx_REG (SFmode,
8043 BASE_ARG_REG (mode)
8044 + (sh_round_reg (*ca, mode) ^ 1)),
8045 const0_rtx);
8046 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8047 gen_rtx_REG (SFmode,
8048 BASE_ARG_REG (mode)
8049 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8050 GEN_INT (4));
8051 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8054 /* If the alignment of a DF value causes an SF register to be
8055 skipped, we will use that skipped register for the next SF
8056 value. */
8057 if ((TARGET_HITACHI || ca->renesas_abi)
8058 && ca->free_single_fp_reg
8059 && mode == SFmode)
8060 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8062 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8063 ^ (mode == SFmode && TARGET_SH4
8064 && TARGET_LITTLE_ENDIAN
8065 && ! TARGET_HITACHI && ! ca->renesas_abi);
8066 return gen_rtx_REG (mode, regno);
8070 return NULL_RTX;
8073 /* Update the data in CUM to advance over an argument
8074 of mode MODE and data type TYPE.
8075 (TYPE is null for libcalls where that information may not be
8076 available.) */
8077 static void
8078 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
8079 const_tree type, bool named ATTRIBUTE_UNUSED)
8081 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8083 if (ca->force_mem)
8084 ca->force_mem = false;
8086 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8088 /* Note that we've used the skipped register. */
8089 if (mode == SFmode && ca->free_single_fp_reg)
8091 ca->free_single_fp_reg = 0;
8092 return;
8094 /* When we have a DF after an SF, there's an SF register that get
8095 skipped in order to align the DF value. We note this skipped
8096 register, because the next SF value will use it, and not the
8097 SF that follows the DF. */
8098 if (mode == DFmode
8099 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
8101 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
8102 + BASE_ARG_REG (mode));
8106 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8107 || sh_pass_in_reg_p (*ca, mode, type))
8108 (ca->arg_count[(int) get_sh_arg_class (mode)]
8109 = (sh_round_reg (*ca, mode)
8110 + (mode == BLKmode
8111 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8112 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
8115 /* The Renesas calling convention doesn't quite fit into this scheme since
8116 the address is passed like an invisible argument, but one that is always
8117 passed in memory. */
8118 static rtx
8119 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8121 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8122 return NULL_RTX;
8123 return gen_rtx_REG (Pmode, 2);
8126 /* Worker function for TARGET_FUNCTION_VALUE.
8128 For the SH, this is like LIBCALL_VALUE, except that we must change the
8129 mode like PROMOTE_MODE does.
8130 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8131 tested here has to be kept in sync with the one in
8132 explow.c:promote_mode. */
8133 static rtx
8134 sh_function_value (const_tree valtype,
8135 const_tree fn_decl_or_type,
8136 bool outgoing ATTRIBUTE_UNUSED)
8138 if (fn_decl_or_type
8139 && !DECL_P (fn_decl_or_type))
8140 fn_decl_or_type = NULL;
8142 return gen_rtx_REG (
8143 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8144 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8145 && (TREE_CODE (valtype) == INTEGER_TYPE
8146 || TREE_CODE (valtype) == ENUMERAL_TYPE
8147 || TREE_CODE (valtype) == BOOLEAN_TYPE
8148 || TREE_CODE (valtype) == REAL_TYPE
8149 || TREE_CODE (valtype) == OFFSET_TYPE))
8150 && sh_promote_prototypes (fn_decl_or_type)
8151 ? SImode : TYPE_MODE (valtype)),
8152 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8155 /* Worker function for TARGET_LIBCALL_VALUE. */
8156 static rtx
8157 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8159 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8162 /* Return true if N is a possible register number of function value. */
8163 static bool
8164 sh_function_value_regno_p (const unsigned int regno)
8166 return regno == FIRST_RET_REG || (TARGET_SH2E && regno == FIRST_FP_RET_REG);
8169 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8170 static bool
8171 sh_return_in_memory (const_tree type, const_tree fndecl)
8173 return TYPE_MODE (type) == BLKmode
8174 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8175 && TREE_CODE (type) == RECORD_TYPE);
8178 /* We actually emit the code in sh_expand_prologue. We used to use
8179 a static variable to flag that we need to emit this code, but that
8180 doesn't when inlining, when functions are deferred and then emitted
8181 later. Fortunately, we already have two flags that are part of struct
8182 function that tell if a function uses varargs or stdarg. */
8183 static void
8184 sh_setup_incoming_varargs (cumulative_args_t ca,
8185 machine_mode mode,
8186 tree type,
8187 int *pretend_arg_size,
8188 int second_time ATTRIBUTE_UNUSED)
8190 gcc_assert (cfun->stdarg);
8191 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8193 int named_parm_regs, anon_parm_regs;
8195 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
8196 + (mode == BLKmode
8197 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8198 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
8199 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8200 if (anon_parm_regs > 0)
8201 *pretend_arg_size = anon_parm_regs * 4;
8205 static bool
8206 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8208 return false;
8211 static bool
8212 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8214 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8216 return ! (TARGET_HITACHI || ca->renesas_abi);
8220 /* Define the offset between two registers, one to be eliminated, and
8221 the other its replacement, at the start of a routine. */
8223 initial_elimination_offset (int from, int to)
8225 const int regs_saved_rounding = 0;
8226 int save_flags = target_flags;
8227 HARD_REG_SET live_regs_mask;
8229 int regs_saved = calc_live_regs (&live_regs_mask);
8231 int total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8232 target_flags = save_flags;
8234 int total_saved_regs_space = regs_saved + regs_saved_rounding;
8236 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8237 return total_saved_regs_space + total_auto_space;
8239 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8240 return total_saved_regs_space + total_auto_space;
8242 /* Initial gap between fp and sp is 0. */
8243 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8244 return 0;
8246 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8247 return rounded_frame_size (0);
8249 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8250 return rounded_frame_size (0);
8252 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8253 && (to == HARD_FRAME_POINTER_REGNUM
8254 || to == STACK_POINTER_REGNUM));
8255 return total_auto_space;
8258 /* Parse the -mfixed-range= option string. */
8259 void
8260 sh_fix_range (const char *const_str)
8262 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8263 REG2 are either register names or register numbers. The effect
8264 of this option is to mark the registers in the range from REG1 to
8265 REG2 as ``fixed'' so they won't be used by the compiler. */
8267 char* str = strcpy ((char*)alloca (strlen (const_str) + 1), const_str);
8269 while (1)
8271 char* dash = strchr (str, '-');
8272 if (!dash)
8274 warning (0, "value of -mfixed-range must have form REG1-REG2");
8275 return;
8277 *dash = '\0';
8278 char* comma = strchr (dash + 1, ',');
8279 if (comma)
8280 *comma = '\0';
8282 int first = decode_reg_name (str);
8283 if (first < 0)
8285 warning (0, "unknown register name: %s", str);
8286 return;
8289 int last = decode_reg_name (dash + 1);
8290 if (last < 0)
8292 warning (0, "unknown register name: %s", dash + 1);
8293 return;
8296 *dash = '-';
8298 if (first > last)
8300 warning (0, "%s-%s is an empty range", str, dash + 1);
8301 return;
8304 for (int i = first; i <= last; ++i)
8305 fixed_regs[i] = call_used_regs[i] = 1;
8307 if (!comma)
8308 break;
8310 *comma = ',';
8311 str = comma + 1;
8315 /* Insert any deferred function attributes from earlier pragmas. */
8316 static void
8317 sh_insert_attributes (tree node, tree *attributes)
8319 if (TREE_CODE (node) != FUNCTION_DECL)
8320 return;
8322 /* We are only interested in fields. */
8323 if (!DECL_P (node))
8324 return;
8326 /* Append the attributes to the deferred attributes. */
8327 *sh_deferred_function_attributes_tail = *attributes;
8328 tree attrs = sh_deferred_function_attributes;
8329 if (!attrs)
8330 return;
8332 /* Some attributes imply or require the interrupt attribute. */
8333 if (!lookup_attribute ("interrupt_handler", attrs)
8334 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8336 /* If we have a trapa_handler, but no interrupt_handler attribute,
8337 insert an interrupt_handler attribute. */
8338 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8339 /* We can't use sh_pr_interrupt here because that's not in the
8340 java frontend. */
8341 attrs
8342 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8343 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8344 if the interrupt attribute is missing, we ignore the attribute
8345 and warn. */
8346 else if (lookup_attribute ("sp_switch", attrs)
8347 || lookup_attribute ("trap_exit", attrs)
8348 || lookup_attribute ("nosave_low_regs", attrs)
8349 || lookup_attribute ("resbank", attrs))
8351 tree *tail;
8353 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8355 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8356 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8357 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8358 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8359 warning (OPT_Wattributes,
8360 "%qE attribute only applies to interrupt functions",
8361 TREE_PURPOSE (attrs));
8362 else
8364 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8365 NULL_TREE);
8366 tail = &TREE_CHAIN (*tail);
8369 attrs = *attributes;
8373 /* Install the processed list. */
8374 *attributes = attrs;
8376 /* Clear deferred attributes. */
8377 sh_deferred_function_attributes = NULL_TREE;
8378 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8380 return;
8383 /*------------------------------------------------------------------------------
8384 Target specific attributes
8385 Supported attributes are:
8387 * interrupt_handler
8388 Specifies this function is an interrupt handler.
8390 * trapa_handler
8391 Like interrupt_handler, but don't save all registers.
8393 * sp_switch
8394 Specifies an alternate stack for an interrupt handler to run on.
8396 * trap_exit
8397 Use a trapa to exit an interrupt function instead of rte.
8399 * nosave_low_regs
8400 Don't save r0..r7 in an interrupt handler function.
8401 This is useful on SH3* and SH4*, which have a separate set of low
8402 regs for user and privileged modes.
8403 This is mainly to be used for non-reentrant interrupt handlers (i.e.
8404 those that run with interrupts disabled and thus can't be
8405 interrupted thenselves).
8407 * renesas
8408 Use Renesas calling/layout conventions (functions and structures).
8410 * resbank
8411 In case of an interrupt handler function, use a register bank to
8412 save registers R0-R14, MACH, MACL, GBR and PR.
8413 This is available only on SH2A targets.
8415 * function_vector
8416 Declares a function to be called using the TBR relative addressing
8417 mode. Takes an argument that specifies the slot number in the table
8418 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
8421 /* Handle a 'resbank' attribute. */
8422 static tree
8423 sh_handle_resbank_handler_attribute (tree * node, tree name,
8424 tree args ATTRIBUTE_UNUSED,
8425 int flags ATTRIBUTE_UNUSED,
8426 bool * no_add_attrs)
8428 if (!TARGET_SH2A)
8430 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8431 name);
8432 *no_add_attrs = true;
8434 if (TREE_CODE (*node) != FUNCTION_DECL)
8436 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8437 name);
8438 *no_add_attrs = true;
8441 return NULL_TREE;
8444 /* Handle an "interrupt_handler" attribute; arguments as in
8445 struct attribute_spec.handler. */
8446 static tree
8447 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8448 tree args ATTRIBUTE_UNUSED,
8449 int flags ATTRIBUTE_UNUSED,
8450 bool *no_add_attrs)
8452 if (TREE_CODE (*node) != FUNCTION_DECL)
8454 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8455 name);
8456 *no_add_attrs = true;
8459 return NULL_TREE;
8462 /* Handle an 'function_vector' attribute; arguments as in
8463 struct attribute_spec.handler. */
8464 static tree
8465 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8466 tree args ATTRIBUTE_UNUSED,
8467 int flags ATTRIBUTE_UNUSED,
8468 bool * no_add_attrs)
8470 if (!TARGET_SH2A)
8472 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8473 name);
8474 *no_add_attrs = true;
8476 else if (TREE_CODE (*node) != FUNCTION_DECL)
8478 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8479 name);
8480 *no_add_attrs = true;
8482 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8484 /* The argument must be a constant integer. */
8485 warning (OPT_Wattributes,
8486 "%qE attribute argument not an integer constant",
8487 name);
8488 *no_add_attrs = true;
8490 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8492 /* The argument value must be between 0 to 255. */
8493 warning (OPT_Wattributes,
8494 "%qE attribute argument should be between 0 to 255",
8495 name);
8496 *no_add_attrs = true;
8498 return NULL_TREE;
8501 /* Returns true if current function has been assigned the attribute
8502 'function_vector'. */
8503 bool
8504 sh2a_is_function_vector_call (rtx x)
8506 if (GET_CODE (x) == SYMBOL_REF
8507 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8509 tree tr = SYMBOL_REF_DECL (x);
8511 if (sh2a_function_vector_p (tr))
8512 return true;
8515 return false;
8518 /* Returns the function vector number, if the attribute
8519 'function_vector' is assigned, otherwise returns zero. */
8521 sh2a_get_function_vector_number (rtx x)
8523 if ((GET_CODE (x) == SYMBOL_REF)
8524 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8526 tree t = SYMBOL_REF_DECL (x);
8528 if (TREE_CODE (t) != FUNCTION_DECL)
8529 return 0;
8531 for (tree list = SH_ATTRIBUTES (t); list; list = TREE_CHAIN (list))
8532 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8533 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8535 return 0;
8537 else
8538 return 0;
8541 /* Handle an "sp_switch" attribute; arguments as in
8542 struct attribute_spec.handler. */
8543 static tree
8544 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8545 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8547 if (TREE_CODE (*node) != FUNCTION_DECL)
8549 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8550 name);
8551 *no_add_attrs = true;
8553 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8555 /* The argument must be a constant string. */
8556 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8557 name);
8558 *no_add_attrs = true;
8561 return NULL_TREE;
8564 /* Handle an "trap_exit" attribute; arguments as in
8565 struct attribute_spec.handler. */
8566 static tree
8567 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8568 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8570 if (TREE_CODE (*node) != FUNCTION_DECL)
8572 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8573 name);
8574 *no_add_attrs = true;
8576 /* The argument specifies a trap number to be used in a trapa instruction
8577 at function exit (instead of an rte instruction). */
8578 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8580 /* The argument must be a constant integer. */
8581 warning (OPT_Wattributes, "%qE attribute argument not an "
8582 "integer constant", name);
8583 *no_add_attrs = true;
8586 return NULL_TREE;
8589 static tree
8590 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8591 tree name ATTRIBUTE_UNUSED,
8592 tree args ATTRIBUTE_UNUSED,
8593 int flags ATTRIBUTE_UNUSED,
8594 bool *no_add_attrs ATTRIBUTE_UNUSED)
8596 return NULL_TREE;
8599 /* True if __attribute__((renesas)) or -mrenesas. */
8600 bool
8601 sh_attr_renesas_p (const_tree td)
8603 if (TARGET_HITACHI)
8604 return true;
8605 if (td == NULL_TREE)
8606 return false;
8607 if (DECL_P (td))
8608 td = TREE_TYPE (td);
8609 if (td == error_mark_node)
8610 return false;
8611 return lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) != NULL_TREE;
8614 /* True if __attribute__((renesas)) or -mrenesas, for the current
8615 function. */
8616 bool
8617 sh_cfun_attr_renesas_p (void)
8619 return sh_attr_renesas_p (current_function_decl);
8622 /* Returns true if the current function has the "interrupt_handler"
8623 attribute set. */
8624 bool
8625 sh_cfun_interrupt_handler_p (void)
8627 return (lookup_attribute ("interrupt_handler",
8628 DECL_ATTRIBUTES (current_function_decl))
8629 != NULL_TREE);
8632 /* Returns true if FUNC has been assigned the attribute
8633 "function_vector". */
8634 bool
8635 sh2a_function_vector_p (tree func)
8637 if (TREE_CODE (func) != FUNCTION_DECL)
8638 return false;
8640 for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list))
8641 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8642 return true;
8644 return false;
8647 /* Returns true if given tree has the "resbank" attribute set. */
8648 bool
8649 sh_cfun_resbank_handler_p (void)
8651 return ((lookup_attribute ("resbank",
8652 DECL_ATTRIBUTES (current_function_decl))
8653 != NULL_TREE)
8654 && (lookup_attribute ("interrupt_handler",
8655 DECL_ATTRIBUTES (current_function_decl))
8656 != NULL_TREE) && TARGET_SH2A);
8659 /* Returns true if the current function has a "trap_exit" attribute set. */
8660 bool
8661 sh_cfun_trap_exit_p (void)
8663 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
8664 != NULL_TREE;
8667 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8668 static const char *
8669 sh_check_pch_target_flags (int old_flags)
8671 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8672 | MASK_SH_E | MASK_HARD_SH4
8673 | MASK_FPU_SINGLE | MASK_SH4))
8674 return _("created and used with different architectures / ABIs");
8675 if ((old_flags ^ target_flags) & MASK_HITACHI)
8676 return _("created and used with different ABIs");
8677 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8678 return _("created and used with different endianness");
8679 return NULL;
8682 /* Predicates used by the templates. */
8684 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
8685 Used only in general_movsrc_operand. */
8686 bool
8687 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8689 switch (REGNO (op))
8691 case PR_REG:
8692 case MACL_REG:
8693 case MACH_REG:
8694 return true;
8696 return false;
8699 /* Returns true if OP is a floating point value with value 0.0. */
8700 bool
8701 fp_zero_operand (rtx op)
8703 if (GET_MODE (op) != SFmode)
8704 return false;
8706 const REAL_VALUE_TYPE* r = CONST_DOUBLE_REAL_VALUE (op);
8707 return real_equal (r, &dconst0) && ! REAL_VALUE_MINUS_ZERO (*r);
8710 /* Returns true if OP is a floating point value with value 1.0. */
8711 bool
8712 fp_one_operand (rtx op)
8714 if (GET_MODE (op) != SFmode)
8715 return false;
8717 return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1);
8720 /* Return the TLS type for TLS symbols. */
8721 enum tls_model
8722 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8724 if (GET_CODE (op) != SYMBOL_REF)
8725 return TLS_MODEL_NONE;
8726 return SYMBOL_REF_TLS_MODEL (op);
8729 /* Return the destination address of a branch. */
8730 static int
8731 branch_dest (rtx branch)
8733 rtx dest = SET_SRC (PATTERN (branch));
8735 if (GET_CODE (dest) == IF_THEN_ELSE)
8736 dest = XEXP (dest, 1);
8738 return INSN_ADDRESSES (INSN_UID (XEXP (dest, 0)));
8741 /* Return nonzero if REG is not used after INSN.
8742 We assume REG is a reload reg, and therefore does
8743 not live past labels. It may live past calls or jumps though. */
8744 bool
8745 reg_unused_after (rtx reg, rtx_insn *insn)
8747 /* If the reg is set by this instruction, then it is safe for our
8748 case. Disregard the case where this is a store to memory, since
8749 we are checking a register used in the store address. */
8750 rtx set = single_set (insn);
8751 if (set && !MEM_P (SET_DEST (set))
8752 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8753 return true;
8755 while ((insn = NEXT_INSN (insn)))
8757 if (!INSN_P (insn))
8758 continue;
8760 rtx_code code = GET_CODE (insn);
8762 #if 0
8763 /* If this is a label that existed before reload, then the register
8764 is dead here. However, if this is a label added by reorg, then
8765 the register may still be live here. We can't tell the difference,
8766 so we just ignore labels completely. */
8767 if (code == CODE_LABEL)
8768 return 1;
8769 /* else */
8770 #endif
8772 if (code == JUMP_INSN)
8773 return false;
8775 /* If this is a sequence, we must handle them all at once.
8776 We could have for instance a call that sets the target register,
8777 and an insn in a delay slot that uses the register. In this case,
8778 we must return 0. */
8779 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8781 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
8782 bool retval = false;
8784 for (int i = 0; i < seq->len (); i++)
8786 rtx_insn *this_insn = seq->insn (i);
8787 rtx set = single_set (this_insn);
8789 if (CALL_P (this_insn))
8790 code = CALL_INSN;
8791 else if (JUMP_P (this_insn))
8793 if (INSN_ANNULLED_BRANCH_P (this_insn))
8794 return false;
8795 code = JUMP_INSN;
8798 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8799 return false;
8800 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8802 if (!MEM_P (SET_DEST (set)))
8803 retval = true;
8804 else
8805 return false;
8807 if (set == NULL_RTX
8808 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8809 return false;
8811 if (retval)
8812 return true;
8813 else if (code == JUMP_INSN)
8814 return false;
8817 rtx set = single_set (insn);
8818 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8819 return false;
8820 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8821 return !MEM_P (SET_DEST (set));
8822 if (set == NULL && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8823 return false;
8825 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8826 return true;
8828 return true;
8832 static GTY(()) rtx t_reg_rtx;
8834 get_t_reg_rtx (void)
8836 if (! t_reg_rtx)
8837 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
8838 return t_reg_rtx;
8841 static GTY(()) tree fpscr_values;
8843 static void
8844 emit_fpu_switch (rtx scratch, int index)
8846 if (fpscr_values == NULL)
8848 tree t = build_index_type (integer_one_node);
8849 t = build_array_type (integer_type_node, t);
8850 t = build_decl (BUILTINS_LOCATION,
8851 VAR_DECL, get_identifier ("__fpscr_values"), t);
8852 DECL_ARTIFICIAL (t) = 1;
8853 DECL_IGNORED_P (t) = 1;
8854 DECL_EXTERNAL (t) = 1;
8855 TREE_STATIC (t) = 1;
8856 TREE_PUBLIC (t) = 1;
8857 TREE_USED (t) = 1;
8859 fpscr_values = t;
8862 rtx src = DECL_RTL (fpscr_values);
8863 if (!can_create_pseudo_p ())
8865 emit_move_insn (scratch, XEXP (src, 0));
8866 if (index != 0)
8867 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8868 src = adjust_automodify_address (src, SImode, scratch, index * 4);
8870 else
8871 src = adjust_address (src, SImode, index * 4);
8873 emit_insn (gen_lds_fpscr (src));
8876 static rtx get_free_reg (HARD_REG_SET);
8878 /* This function returns a register to use to load the address to load
8879 the fpscr from. Currently it always returns r1 or r7, but when we are
8880 able to use pseudo registers after combine, or have a better mechanism
8881 for choosing a register, it should be done here. */
8882 /* REGS_LIVE is the liveness information for the point for which we
8883 need this allocation. In some bare-bones exit blocks, r1 is live at the
8884 start. We can even have all of r0..r3 being live:
8885 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8886 INSN before which new insns are placed with will clobber the register
8887 we return. If a basic block consists only of setting the return value
8888 register to a pseudo and using that register, the return value is not
8889 live before or after this block, yet we we'll insert our insns right in
8890 the middle. */
8891 static rtx
8892 get_free_reg (HARD_REG_SET regs_live)
8894 if (! TEST_HARD_REG_BIT (regs_live, 1))
8895 return gen_rtx_REG (Pmode, 1);
8897 /* Hard reg 1 is live; since this is a small register classes target,
8898 there shouldn't be anything but a jump before the function end. */
8899 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8900 return gen_rtx_REG (Pmode, 7);
8903 /* This function will set the fpscr from memory.
8904 MODE is the mode we are setting it to. */
8905 void
8906 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8908 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
8909 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8911 rtx addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8912 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8915 /* Is the given character a logical line separator for the assembler? */
8916 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8917 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8918 #endif
8920 static bool
8921 sequence_insn_p (rtx_insn *insn)
8923 rtx_insn* prev = PREV_INSN (insn);
8924 if (prev == NULL)
8925 return false;
8927 rtx_insn* next = NEXT_INSN (prev);
8928 if (next == NULL)
8929 return false;
8931 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
8935 sh_insn_length_adjustment (rtx_insn *insn)
8937 /* Instructions with unfilled delay slots take up an extra two bytes for
8938 the nop in the delay slot. */
8939 if (((NONJUMP_INSN_P (insn)
8940 && GET_CODE (PATTERN (insn)) != USE
8941 && GET_CODE (PATTERN (insn)) != CLOBBER)
8942 || CALL_P (insn) || JUMP_P (insn))
8943 && ! sequence_insn_p (insn)
8944 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8945 return 2;
8947 /* Increase the insn length of a cbranch without a delay slot insn to
8948 force a delay slot which will be stuffed with a nop. */
8949 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
8950 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
8951 && ! sequence_insn_p (insn))
8952 return 2;
8954 /* sh-dsp parallel processing insn take four bytes instead of two. */
8956 if (NONJUMP_INSN_P (insn))
8958 int sum = 0;
8959 rtx body = PATTERN (insn);
8960 const char *templ;
8961 char c;
8962 bool maybe_label = true;
8964 if (GET_CODE (body) == ASM_INPUT)
8965 templ = XSTR (body, 0);
8966 else if (asm_noperands (body) >= 0)
8967 templ
8968 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8969 else
8970 return 0;
8973 int ppi_adjust = 0;
8976 c = *templ++;
8977 while (c == ' ' || c == '\t');
8978 /* all sh-dsp parallel-processing insns start with p.
8979 The only non-ppi sh insn starting with p is pref.
8980 The only ppi starting with pr is prnd. */
8981 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8982 ppi_adjust = 2;
8983 /* The repeat pseudo-insn expands two three insns, a total of
8984 six bytes in size. */
8985 else if ((c == 'r' || c == 'R')
8986 && ! strncasecmp ("epeat", templ, 5))
8987 ppi_adjust = 4;
8988 while (c && c != '\n'
8989 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
8991 /* If this is a label, it is obviously not a ppi insn. */
8992 if (c == ':' && maybe_label)
8994 ppi_adjust = 0;
8995 break;
8997 else if (c == '\'' || c == '"')
8998 maybe_label = false;
8999 c = *templ++;
9001 sum += ppi_adjust;
9002 maybe_label = c != ':';
9004 while (c);
9005 return sum;
9007 return 0;
9010 /* Return TRUE for a valid displacement for the REG+disp addressing
9011 with MODE. */
9012 bool
9013 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
9014 bool allow_zero)
9016 if (! CONST_INT_P (op))
9017 return false;
9020 const HOST_WIDE_INT offset = INTVAL (op);
9021 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
9022 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
9024 /* If the mode does not support any displacement always return false.
9025 Even though an index of '0' is actually always valid, it will cause
9026 troubles when e.g. a DFmode move is split into two SFmode moves,
9027 where one SFmode move will have index '0' and the other move will
9028 have index '4'. */
9029 if (!allow_zero && max_disp < 1)
9030 return false;
9032 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
9036 /* Recognize an RTL expression that is a valid memory address for
9037 an instruction.
9038 The MODE argument is the machine mode for the MEM expression
9039 that wants to use this address.
9040 Allow REG
9041 REG+disp
9042 REG+r0
9043 REG++
9044 --REG
9046 GBR+disp */
9047 static bool
9048 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
9050 if (REG_P (x) && REGNO (x) == GBR_REG)
9051 return true;
9053 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9054 return true;
9055 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9056 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9057 return true;
9058 else if (GET_CODE (x) == PLUS)
9060 rtx xop0 = XEXP (x, 0);
9061 rtx xop1 = XEXP (x, 1);
9063 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
9064 return gbr_displacement (xop1, mode);
9066 if (GET_MODE_SIZE (mode) <= 8
9067 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9068 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
9069 return true;
9071 if (GET_MODE_SIZE (mode) <= 4
9072 || (TARGET_FPU_DOUBLE && TARGET_FMOVD && mode == DFmode))
9074 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9075 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9076 return true;
9077 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9078 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9079 return true;
9083 return false;
9086 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9087 isn't protected by a PIC unspec. */
9088 bool
9089 nonpic_symbol_mentioned_p (rtx x)
9091 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9092 || GET_CODE (x) == PC)
9093 return true;
9095 /* We don't want to look into the possible MEM location of a
9096 CONST_DOUBLE, since we're not going to use it, in general. */
9097 if (GET_CODE (x) == CONST_DOUBLE)
9098 return false;
9100 if (GET_CODE (x) == UNSPEC
9101 && (XINT (x, 1) == UNSPEC_PIC
9102 || XINT (x, 1) == UNSPEC_GOT
9103 || XINT (x, 1) == UNSPEC_GOTOFF
9104 || XINT (x, 1) == UNSPEC_GOTPLT
9105 || XINT (x, 1) == UNSPEC_GOTTPOFF
9106 || XINT (x, 1) == UNSPEC_DTPOFF
9107 || XINT (x, 1) == UNSPEC_TPOFF
9108 || XINT (x, 1) == UNSPEC_PLT
9109 || XINT (x, 1) == UNSPEC_PCREL
9110 || XINT (x, 1) == UNSPEC_SYMOFF
9111 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
9112 || XINT (x, 1) == UNSPEC_GOTFUNCDESC
9113 || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
9114 return false;
9116 const char* fmt = GET_RTX_FORMAT (GET_CODE (x));
9117 for (int i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9119 if (fmt[i] == 'E')
9121 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
9122 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9123 return true;
9125 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9126 return true;
9129 return false;
9132 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9133 @GOTOFF in `reg'. */
9135 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg)
9137 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9138 return orig;
9140 if (GET_CODE (orig) == LABEL_REF
9141 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9143 if (reg == NULL_RTX)
9144 reg = gen_reg_rtx (Pmode);
9146 if (TARGET_FDPIC
9147 && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig))
9149 /* Weak functions may be NULL which doesn't work with
9150 GOTOFFFUNCDESC because the runtime offset is not known. */
9151 if (SYMBOL_REF_WEAK (orig))
9152 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9153 else
9154 emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
9156 else if (TARGET_FDPIC
9157 && (GET_CODE (orig) == LABEL_REF
9158 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig)
9159 && (TREE_READONLY (SYMBOL_REF_DECL (orig))
9160 || SYMBOL_REF_EXTERNAL_P (orig)
9161 || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig))))))
9162 /* In FDPIC, GOTOFF can only be used for writable data. */
9163 emit_insn (gen_symGOT2reg (reg, orig));
9164 else
9165 emit_insn (gen_symGOTOFF2reg (reg, orig));
9166 return reg;
9168 else if (GET_CODE (orig) == SYMBOL_REF)
9170 if (reg == NULL_RTX)
9171 reg = gen_reg_rtx (Pmode);
9173 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
9174 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9175 else
9176 emit_insn (gen_symGOT2reg (reg, orig));
9177 return reg;
9179 return orig;
9182 /* Given a (logical) mode size and an offset in bytes, try to find a the
9183 appropriate displacement value for a mov insn. On SH the displacements
9184 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
9185 15 bytes in QImode. To compensate this we create a new base address by
9186 adding an adjustment value to it.
9188 If the originally requested offset is greater than 127 we prefer using
9189 values 124..127 over 128..131 to increase opportunities to use the
9190 add #imm, Rn insn.
9192 In some cases it is possible that a requested offset might seem unaligned
9193 or inappropriate for the mode size, like offset = 2 and mode size = 4.
9194 This is compensated by adjusting the base address so that the effective
9195 address of the displacement move insn will be aligned.
9197 This is not the best possible way of rebasing the base address, as it
9198 does not look at other present displacement addressings around it.
9199 In some cases this can create more base address adjustments than would
9200 actually be necessary. */
9201 struct disp_adjust
9203 rtx offset_adjust;
9204 rtx mov_disp;
9207 static struct disp_adjust
9208 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
9210 struct disp_adjust res = { NULL_RTX, NULL_RTX };
9212 /* Do not try to use SH2A's large displacements here, because this would
9213 effectively disable the small displacement insns. */
9214 const int mode_sz = GET_MODE_SIZE (mode);
9215 const int mov_insn_sz = mov_insn_size (mode, false);
9216 const int max_disp = sh_max_mov_insn_displacement (mode, false);
9217 const int max_disp_next = max_disp + mov_insn_sz;
9218 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
9219 HOST_WIDE_INT offset_adjust;
9221 /* In some cases this actually does happen and we must check for it. */
9222 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
9223 return res;
9225 /* Keeps the previous behavior for QImode displacement addressing.
9226 This just decides how the offset is re-based. Removing this special
9227 case will result in slightly bigger code on average, but it's not that
9228 bad actually. */
9229 if (mov_insn_sz == 1)
9230 align_modifier = 0;
9232 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
9234 if (mode_sz + offset - offset_adjust <= max_disp_next)
9236 res.offset_adjust = GEN_INT (offset_adjust);
9237 res.mov_disp = GEN_INT (offset - offset_adjust);
9240 return res;
9243 /* Try to modify an illegitimate address and make it legitimate.
9244 If we find one, return the new, valid address.
9245 Otherwise, return the original address. */
9246 static rtx
9247 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9249 if (flag_pic)
9250 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9252 if ((TARGET_FPU_DOUBLE && mode == DFmode)
9253 || (TARGET_SH2E && mode == SFmode))
9254 return x;
9256 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
9257 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
9259 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
9260 INTVAL (XEXP (x, 1)));
9262 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9264 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9265 adj.offset_adjust, NULL_RTX, 0,
9266 OPTAB_LIB_WIDEN);
9267 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9270 return x;
9273 /* Attempt to replace *p, which is an address that needs reloading, with
9274 a valid memory address for an operand of mode MODE.
9275 Like for sh_legitimize_address, for the SH we try to get a normal form
9276 of the address. That will allow inheritance of the address reloads. */
9277 bool
9278 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9279 int itype)
9281 enum reload_type type = (enum reload_type) itype;
9282 const int mode_sz = GET_MODE_SIZE (mode);
9284 if (sh_lra_p ())
9285 return false;
9287 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
9288 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true))
9290 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
9291 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
9293 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9295 push_reload (*p, NULL_RTX, p, NULL,
9296 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9297 return true;
9300 if (TARGET_SH2E && mode == SFmode)
9302 *p = copy_rtx (*p);
9303 push_reload (*p, NULL_RTX, p, NULL,
9304 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9305 return true;
9308 /* FIXME: Do not allow to legitimize QImode and HImode displacement
9309 moves because then reload has a problem figuring the constraint
9310 that the move insn target/source reg must be R0.
9311 Or maybe some handling is wrong in sh_secondary_reload for this
9312 to work properly? */
9313 if ((mode_sz == 4 || mode_sz == 8)
9314 && ! (TARGET_SH4 && mode == DFmode)
9315 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9317 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
9318 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9319 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9320 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9321 return true;
9325 /* We must re-recognize what we created before. */
9326 if (GET_CODE (*p) == PLUS
9327 && (mode_sz == 4 || mode_sz == 8)
9328 && GET_CODE (XEXP (*p, 0)) == PLUS
9329 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9330 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9331 && CONST_INT_P (XEXP (*p, 1))
9332 && ! (TARGET_SH2E && mode == SFmode))
9334 /* Because this address is so complex, we know it must have
9335 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9336 it is already unshared, and needs no further unsharing. */
9337 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9338 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9339 return true;
9342 return false;
9345 /* In the name of slightly smaller debug output, and to cater to
9346 general assembler lossage, recognize various UNSPEC sequences
9347 and turn them back into a direct symbol reference. */
9348 static rtx
9349 sh_delegitimize_address (rtx orig_x)
9351 orig_x = delegitimize_mem_from_attrs (orig_x);
9353 rtx x = orig_x;
9354 if (MEM_P (x))
9355 x = XEXP (x, 0);
9356 if (GET_CODE (x) == CONST)
9358 rtx y = XEXP (x, 0);
9359 if (GET_CODE (y) == UNSPEC)
9361 if (XINT (y, 1) == UNSPEC_GOT
9362 || XINT (y, 1) == UNSPEC_GOTOFF
9363 || XINT (y, 1) == UNSPEC_SYMOFF)
9364 return XVECEXP (y, 0, 0);
9365 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
9367 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
9369 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
9371 if (GET_CODE (symplt) == UNSPEC
9372 && (XINT (symplt, 1) == UNSPEC_PLT
9373 || XINT (symplt, 1) == UNSPEC_PCREL))
9374 return XVECEXP (symplt, 0, 0);
9380 return orig_x;
9383 /* Mark the use of a constant in the literal table. If the constant
9384 has multiple labels, make it unique. */
9385 static rtx
9386 mark_constant_pool_use (rtx x)
9388 if (x == NULL_RTX)
9389 return x;
9391 switch (GET_CODE (x))
9393 case LABEL_REF:
9394 x = XEXP (x, 0);
9395 case CODE_LABEL:
9396 break;
9397 default:
9398 return x;
9401 /* Get the first label in the list of labels for the same constant
9402 and delete another labels in the list. */
9403 rtx_insn* lab = as_a <rtx_insn*> (x);
9404 for (rtx_insn* insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
9406 if (!LABEL_P (insn)
9407 || LABEL_REFS (insn) != NEXT_INSN (insn))
9408 break;
9409 lab = insn;
9412 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9413 as_a<rtx_insn *> (insn)->set_deleted ();
9415 /* Mark constants in a window. */
9416 for (rtx_insn* insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn;
9417 insn = NEXT_INSN (insn))
9419 if (!NONJUMP_INSN_P (insn))
9420 continue;
9422 rtx pattern = PATTERN (insn);
9423 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9424 continue;
9426 switch (XINT (pattern, 1))
9428 case UNSPECV_CONST2:
9429 case UNSPECV_CONST4:
9430 case UNSPECV_CONST8:
9431 XVECEXP (pattern, 0, 1) = const1_rtx;
9432 break;
9433 case UNSPECV_WINDOW_END:
9434 if (XVECEXP (pattern, 0, 0) == x)
9435 return lab;
9436 break;
9437 case UNSPECV_CONST_END:
9438 return lab;
9439 default:
9440 break;
9444 return lab;
9447 /* Return true if it's possible to redirect BRANCH1 to the destination
9448 of an unconditional jump BRANCH2. We only want to do this if the
9449 resulting branch will have a short displacement. */
9450 static bool
9451 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
9453 /* Don't follow if BRANCH2 is possible to be a jump crossing between
9454 hot and cold partitions. */
9455 if (flag_reorder_blocks_and_partition
9456 && simplejump_p (branch2)
9457 && CROSSING_JUMP_P (branch2))
9458 return false;
9460 if (flag_expensive_optimizations && simplejump_p (branch2))
9462 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9463 rtx_insn *insn;
9464 int distance;
9466 for (distance = 0, insn = NEXT_INSN (branch1);
9467 insn && distance < 256;
9468 insn = PREV_INSN (insn))
9470 if (insn == dest)
9471 return true;
9472 else
9473 distance += get_attr_length (insn);
9475 for (distance = 0, insn = NEXT_INSN (branch1);
9476 insn && distance < 256;
9477 insn = NEXT_INSN (insn))
9479 if (insn == dest)
9480 return true;
9481 else
9482 distance += get_attr_length (insn);
9485 return false;
9488 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9489 bool
9490 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9491 unsigned int new_reg)
9493 /* Interrupt functions can only use registers that have already been
9494 saved by the prologue, even if they would normally be
9495 call-clobbered. */
9496 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9497 return false;
9499 return true;
9502 /* Function to update the integer COST
9503 based on the relationship between INSN that is dependent on
9504 DEP_INSN through the dependence LINK. The default is to make no
9505 adjustment to COST. This can be used for example to specify to
9506 the scheduler that an output- or anti-dependence does not incur
9507 the same cost as a data-dependence. The return value should be
9508 the new value for COST. */
9509 static int
9510 sh_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
9511 unsigned int)
9513 rtx reg, use_pat;
9515 if (dep_type == 0)
9517 if (recog_memoized (insn) < 0
9518 || recog_memoized (dep_insn) < 0)
9519 return cost;
9521 rtx dep_set = single_set (dep_insn);
9523 /* The latency that we specify in the scheduling description refers
9524 to the actual output, not to an auto-increment register; for that,
9525 the latency is one. */
9526 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9528 rtx set = single_set (insn);
9530 if (set
9531 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9532 && (!MEM_P (SET_DEST (set))
9533 || !reg_mentioned_p (SET_DEST (dep_set),
9534 XEXP (SET_DEST (set), 0))))
9535 cost = 1;
9537 /* The only input for a call that is timing-critical is the
9538 function's address. */
9539 if (CALL_P (insn))
9541 rtx call = get_call_rtx_from (insn);
9542 if (call
9543 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9544 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9545 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9546 cost -= TARGET_SH4_300 ? 3 : 6;
9548 /* Likewise, the most timing critical input for an sfuncs call
9549 is the function address. However, sfuncs typically start
9550 using their arguments pretty quickly.
9551 Assume a four cycle delay for SH4 before they are needed.
9552 Cached ST40-300 calls are quicker, so assume only a one
9553 cycle delay there.
9554 ??? Maybe we should encode the delays till input registers
9555 are needed by sfuncs into the sfunc call insn. */
9556 /* All sfunc calls are parallels with at least four components.
9557 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9558 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9559 && XVECLEN (PATTERN (insn), 0) >= 4
9560 && (reg = sfunc_uses_reg (insn)))
9562 if (! reg_set_p (reg, dep_insn))
9563 cost -= TARGET_SH4_300 ? 1 : 4;
9565 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9567 attr_type dep_type = get_attr_type (dep_insn);
9568 attr_type type;
9569 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9570 cost--;
9571 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9572 && (type = get_attr_type (insn)) != TYPE_CALL
9573 && type != TYPE_SFUNC)
9574 cost--;
9575 /* When the preceding instruction loads the shift amount of
9576 the following SHAD/SHLD, the latency of the load is increased
9577 by 1 cycle. */
9578 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9579 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9580 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9581 XEXP (SET_SRC (single_set (insn)),
9582 1)))
9583 cost++;
9584 /* When an LS group instruction with a latency of less than
9585 3 cycles is followed by a double-precision floating-point
9586 instruction, FIPR, or FTRV, the latency of the first
9587 instruction is increased to 3 cycles. */
9588 else if (cost < 3
9589 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9590 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9591 cost = 3;
9592 /* The lsw register of a double-precision computation is ready one
9593 cycle earlier. */
9594 else if (reload_completed
9595 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9596 && (use_pat = single_set (insn))
9597 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9598 SET_SRC (use_pat)))
9599 cost -= 1;
9601 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9602 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9603 cost -= 1;
9605 else if (TARGET_SH4_300)
9607 /* Stores need their input register two cycles later. */
9608 attr_type type;
9609 if (dep_set && cost >= 1
9610 && ((type = get_attr_type (insn)) == TYPE_STORE
9611 || type == TYPE_PSTORE
9612 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9614 rtx set = single_set (insn);
9616 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9617 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9619 cost -= 2;
9620 /* But don't reduce the cost below 1 if the address depends
9621 on a side effect of dep_insn. */
9622 if (cost < 1
9623 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9624 cost = 1;
9629 /* An anti-dependence penalty of two applies if the first insn is a double
9630 precision fadd / fsub / fmul. */
9631 else if (!TARGET_SH4_300
9632 && dep_type == REG_DEP_ANTI
9633 && recog_memoized (dep_insn) >= 0
9634 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9635 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9636 /* A lot of alleged anti-flow dependences are fake,
9637 so check this one is real. */
9638 && flow_dependent_p (dep_insn, insn))
9639 cost = 2;
9641 return cost;
9644 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9645 if DEP_INSN is anti-flow dependent on INSN. */
9646 static bool
9647 flow_dependent_p (rtx insn, rtx dep_insn)
9649 rtx tmp = PATTERN (insn);
9651 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9652 return tmp == NULL_RTX;
9655 /* A helper function for flow_dependent_p called through note_stores. */
9656 static void
9657 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9659 rtx * pinsn = (rtx *) data;
9661 if (*pinsn && reg_referenced_p (x, *pinsn))
9662 *pinsn = NULL_RTX;
9665 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9666 'special function' patterns (type sfunc) that clobber pr, but that
9667 do not look like function calls to leaf_function_p. Hence we must
9668 do this extra check. */
9669 static int
9670 sh_pr_n_sets (void)
9672 return DF_REG_DEF_COUNT (PR_REG);
9675 /* Return where to allocate pseudo for a given hard register initial
9676 value. */
9677 static rtx
9678 sh_allocate_initial_value (rtx hard_reg)
9680 if (REGNO (hard_reg) == PR_REG)
9682 if (crtl->is_leaf && ! sh_pr_n_sets ())
9683 return hard_reg;
9684 else
9685 return gen_frame_mem (Pmode, return_address_pointer_rtx);
9688 return NULL_RTX;
9691 /* This function returns "2" to indicate dual issue for the SH4
9692 processor. To be used by the DFA pipeline description. */
9693 static int
9694 sh_issue_rate (void)
9696 if (TARGET_SUPERSCALAR)
9697 return 2;
9698 else
9699 return 1;
9702 /* Functions for ready queue reordering for sched1. */
9704 /* Get weight for mode for a set x. */
9705 static short
9706 find_set_regmode_weight (rtx x, machine_mode mode)
9708 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9709 return 1;
9710 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9712 if (REG_P (SET_DEST (x)))
9714 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9715 return 1;
9716 else
9717 return 0;
9719 return 1;
9721 return 0;
9724 /* Get regmode weight for insn. */
9725 static short
9726 find_insn_regmode_weight (rtx insn, machine_mode mode)
9728 /* Increment weight for each register born here. */
9729 rtx x = PATTERN (insn);
9730 short reg_weight = find_set_regmode_weight (x, mode);
9731 if (GET_CODE (x) == PARALLEL)
9733 int j;
9734 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9736 x = XVECEXP (PATTERN (insn), 0, j);
9737 reg_weight += find_set_regmode_weight (x, mode);
9740 /* Decrement weight for each register that dies here. */
9741 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9743 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9745 rtx note = XEXP (x, 0);
9746 if (REG_P (note) && GET_MODE (note) == mode)
9747 reg_weight--;
9750 return reg_weight;
9753 /* Calculate regmode weights for all insns of a basic block. */
9754 static void
9755 find_regmode_weight (basic_block b, machine_mode mode)
9757 rtx_insn *insn, *next_tail, *head, *tail;
9759 get_ebb_head_tail (b, b, &head, &tail);
9760 next_tail = NEXT_INSN (tail);
9762 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9764 /* Handle register life information. */
9765 if (!INSN_P (insn))
9766 continue;
9768 if (mode == SFmode)
9769 INSN_REGMODE_WEIGHT (insn, mode) =
9770 find_insn_regmode_weight (insn, mode)
9771 + 2 * find_insn_regmode_weight (insn, DFmode);
9772 else if (mode == SImode)
9773 INSN_REGMODE_WEIGHT (insn, mode) =
9774 find_insn_regmode_weight (insn, mode)
9775 + 2 * find_insn_regmode_weight (insn, DImode);
9779 /* Comparison function for ready queue sorting. */
9780 static int
9781 rank_for_reorder (const void *x, const void *y)
9783 rtx_insn *tmp = *(rtx_insn * const *) y;
9784 rtx_insn *tmp2 = *(rtx_insn * const *) x;
9786 /* The insn in a schedule group should be issued the first. */
9787 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9788 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9790 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9791 minimizes instruction movement, thus minimizing sched's effect on
9792 register pressure. */
9793 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9796 /* Resort the array A in which only element at index N may be out of order. */
9797 static void
9798 swap_reorder (rtx_insn **a, int n)
9800 rtx_insn *insn = a[n - 1];
9801 int i = n - 2;
9803 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9805 a[i + 1] = a[i];
9806 i -= 1;
9808 a[i + 1] = insn;
9811 /* Sort the ready list by ascending priority. */
9812 static void
9813 ready_reorder (rtx_insn **ready, int nready)
9815 if (nready == 2)
9816 swap_reorder (ready, nready);
9817 else if (nready > 2)
9818 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
9821 /* Count life regions of r0 for a block. */
9822 static int
9823 find_r0_life_regions (basic_block b)
9825 bool live;
9826 int set;
9827 int death = 0;
9829 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9831 set = 1;
9832 live = true;
9834 else
9836 set = 0;
9837 live = false;
9840 rtx_insn* insn = BB_HEAD (b);
9841 rtx_insn* end = BB_END (b);
9842 rtx r0_reg = gen_rtx_REG (SImode, R0_REG);
9843 while (1)
9845 if (INSN_P (insn))
9847 if (find_regno_note (insn, REG_DEAD, R0_REG))
9849 death++;
9850 live = false;
9853 rtx pset;
9854 if (!live
9855 && (pset = single_set (insn))
9856 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9857 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9859 set++;
9860 live = true;
9863 if (insn == end)
9864 break;
9865 insn = NEXT_INSN (insn);
9867 return set - death;
9870 /* Calculate regmode weights for all insns of all basic block. */
9871 static void
9872 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9873 int verbose ATTRIBUTE_UNUSED,
9874 int old_max_uid)
9876 basic_block b;
9878 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9879 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9880 r0_life_regions = 0;
9882 FOR_EACH_BB_REVERSE_FN (b, cfun)
9884 find_regmode_weight (b, SImode);
9885 find_regmode_weight (b, SFmode);
9886 if (!reload_completed)
9887 r0_life_regions += find_r0_life_regions (b);
9890 CURR_REGMODE_PRESSURE (SImode) = 0;
9891 CURR_REGMODE_PRESSURE (SFmode) = 0;
9894 /* Cleanup. */
9895 static void
9896 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9897 int verbose ATTRIBUTE_UNUSED)
9899 if (regmode_weight[0])
9901 free (regmode_weight[0]);
9902 regmode_weight[0] = NULL;
9904 if (regmode_weight[1])
9906 free (regmode_weight[1]);
9907 regmode_weight[1] = NULL;
9911 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9912 keep count of register pressures on SImode and SFmode. */
9913 static int
9914 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9915 int sched_verbose ATTRIBUTE_UNUSED,
9916 rtx_insn *insn,
9917 int can_issue_more)
9919 if (GET_CODE (PATTERN (insn)) != USE
9920 && GET_CODE (PATTERN (insn)) != CLOBBER)
9921 cached_can_issue_more = can_issue_more - 1;
9922 else
9923 cached_can_issue_more = can_issue_more;
9925 if (reload_completed)
9926 return cached_can_issue_more;
9928 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9929 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9931 return cached_can_issue_more;
9934 static void
9935 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9936 int verbose ATTRIBUTE_UNUSED,
9937 int veclen ATTRIBUTE_UNUSED)
9939 CURR_REGMODE_PRESSURE (SImode) = 0;
9940 CURR_REGMODE_PRESSURE (SFmode) = 0;
9943 /* Some magic numbers. */
9944 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9945 functions that already have high pressure on r0. */
9946 #define R0_MAX_LIFE_REGIONS 2
9947 /* Register Pressure thresholds for SImode and SFmode registers. */
9948 #define SIMODE_MAX_WEIGHT 5
9949 #define SFMODE_MAX_WEIGHT 10
9951 /* Return true if the pressure is high for MODE. */
9952 static bool
9953 high_pressure (machine_mode mode)
9955 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9956 functions that already have high pressure on r0. */
9957 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9958 return true;
9960 if (mode == SFmode)
9961 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9962 else
9963 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9966 /* Reorder ready queue if register pressure is high. */
9967 static int
9968 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9969 int sched_verbose ATTRIBUTE_UNUSED,
9970 rtx_insn **ready,
9971 int *n_readyp,
9972 int clock_var ATTRIBUTE_UNUSED)
9974 if (reload_completed)
9975 return sh_issue_rate ();
9977 if (high_pressure (SFmode) || high_pressure (SImode))
9979 ready_reorder (ready, *n_readyp);
9982 return sh_issue_rate ();
9985 /* Skip cycles if the current register pressure is high. */
9986 static int
9987 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9988 int sched_verbose ATTRIBUTE_UNUSED,
9989 rtx_insn **ready ATTRIBUTE_UNUSED,
9990 int *n_readyp ATTRIBUTE_UNUSED,
9991 int clock_var ATTRIBUTE_UNUSED)
9993 if (reload_completed)
9994 return cached_can_issue_more;
9996 if (high_pressure(SFmode) || high_pressure (SImode))
9997 skip_cycles = 1;
9999 return cached_can_issue_more;
10002 /* Skip cycles without sorting the ready queue. This will move insn from
10003 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10004 queue by sh_reorder. */
10006 /* Generally, skipping these many cycles are sufficient for all insns to move
10007 from Q -> R. */
10008 #define MAX_SKIPS 8
10010 static int
10011 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10012 int sched_verbose ATTRIBUTE_UNUSED,
10013 rtx_insn *insn ATTRIBUTE_UNUSED,
10014 int last_clock_var,
10015 int clock_var,
10016 int *sort_p)
10018 if (reload_completed)
10019 return 0;
10021 if (skip_cycles)
10023 if ((clock_var - last_clock_var) < MAX_SKIPS)
10025 *sort_p = 0;
10026 return 1;
10028 /* If this is the last cycle we are skipping, allow reordering of R. */
10029 if ((clock_var - last_clock_var) == MAX_SKIPS)
10031 *sort_p = 1;
10032 return 1;
10036 skip_cycles = 0;
10038 return 0;
10041 static bool
10042 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10044 return TARGET_HITACHI || sh_attr_renesas_p (record_type);
10048 On the SH1..SH4, the trampoline looks like
10049 2 0002 D202 mov.l l2,r2
10050 1 0000 D301 mov.l l1,r3
10051 3 0004 422B jmp @r2
10052 4 0006 0009 nop
10053 5 0008 00000000 l1: .long area
10054 6 000c 00000000 l2: .long function
10056 FDPIC needs a form that includes a function descriptor and
10057 code to load the GOT register:
10058 0 0000 00000000 .long l0
10059 1 0004 00000000 .long gotval
10060 2 0008 D302 l0: mov.l l1,r3
10061 3 000a D203 mov.l l2,r2
10062 4 000c 6122 mov.l @r2,r1
10063 5 000e 5C21 mov.l @(4,r2),r12
10064 6 0010 412B jmp @r1
10065 7 0012 0009 nop
10066 8 0014 00000000 l1: .long area
10067 9 0018 00000000 l2: .long function
10069 SH5 (compact) uses r1 instead of r3 for the static chain. */
10071 /* Emit insns to store a value at memory address + offset. */
10072 static void
10073 sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value)
10075 gcc_assert ((offset & 3) == 0);
10076 emit_move_insn (offset == 0
10077 ? change_address (addr, SImode, NULL_RTX)
10078 : adjust_address (addr, SImode, offset), value);
10081 /* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2. */
10082 static void
10083 sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1)
10085 sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN
10086 ? (w0 | (w1 << 16))
10087 : (w1 | (w0 << 16)), SImode));
10090 /* Emit RTL insns to initialize the variable parts of a trampoline.
10091 FNADDR is an RTX for the address of the function's pure code.
10092 CXT is an RTX for the static chain value for the function. */
10093 static void
10094 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10096 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10097 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10099 if (TARGET_FDPIC)
10101 rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
10103 sh_emit_storesi (tramp_mem, 0, a);
10104 sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ());
10106 sh_emit_storehi (tramp_mem, 8, 0xd302, 0xd203);
10107 sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21);
10108 sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009);
10110 sh_emit_storesi (tramp_mem, 20, cxt);
10111 sh_emit_storesi (tramp_mem, 24, fnaddr);
10113 else
10115 sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301);
10116 sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009);
10118 sh_emit_storesi (tramp_mem, 8, cxt);
10119 sh_emit_storesi (tramp_mem, 12, fnaddr);
10121 if (TARGET_HARD_SH4)
10123 if (!TARGET_INLINE_IC_INVALIDATE
10124 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
10125 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10126 FUNCTION_ORDINARY).sym,
10127 LCT_NORMAL, VOIDmode, tramp, SImode);
10128 else
10129 emit_insn (gen_ic_invalidate_line (tramp));
10133 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10134 static rtx
10135 sh_trampoline_adjust_address (rtx tramp)
10137 return tramp;
10140 /* If PIC, we cannot make sibling calls to global functions
10141 because the PLT requires r12 to be live. */
10142 static bool
10143 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10145 return (1
10146 && ! sh_cfun_interrupt_handler_p ()
10147 && (! flag_pic || TARGET_FDPIC
10148 || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
10149 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10152 /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */
10153 void
10154 sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
10156 const_tree decl = SYMBOL_REF_DECL (sym);
10157 bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
10159 if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
10160 emit_insn (gen_sym_label2reg (reg, sym, lab));
10161 else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
10162 emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
10163 else
10164 emit_insn (gen_symPLT_label2reg (reg, sym, lab));
10167 /* Machine specific built-in functions. */
10169 struct builtin_description
10171 bool (* const is_enabled) (void);
10172 const enum insn_code icode;
10173 const char *const name;
10174 int signature;
10175 tree fndecl;
10178 /* This function can be used if there are any built-ins that are not for
10179 SHmedia. It's commented out to avoid the defined-but-unused warning. */
10180 static bool
10181 sh1_builtin_p (void)
10183 return TARGET_SH1;
10186 /* describe number and signedness of arguments; arg[0] == result
10187 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10188 /* 9: 64-bit pointer, 10: 32-bit pointer */
10189 static const char signature_args[][4] =
10191 #define SH_BLTIN_V2SI2 0
10192 { 4, 4 },
10193 #define SH_BLTIN_V4HI2 1
10194 { 4, 4 },
10195 #define SH_BLTIN_V2SI3 2
10196 { 4, 4, 4 },
10197 #define SH_BLTIN_V4HI3 3
10198 { 4, 4, 4 },
10199 #define SH_BLTIN_V8QI3 4
10200 { 4, 4, 4 },
10201 #define SH_BLTIN_MAC_HISI 5
10202 { 1, 4, 4, 1 },
10203 #define SH_BLTIN_SH_HI 6
10204 { 4, 4, 1 },
10205 #define SH_BLTIN_SH_SI 7
10206 { 4, 4, 1 },
10207 #define SH_BLTIN_V4HI2V2SI 8
10208 { 4, 4, 4 },
10209 #define SH_BLTIN_V4HI2V8QI 9
10210 { 4, 4, 4 },
10211 #define SH_BLTIN_SISF 10
10212 { 4, 2 },
10213 #define SH_BLTIN_LDUA_L 11
10214 { 2, 10 },
10215 #define SH_BLTIN_LDUA_Q 12
10216 { 1, 10 },
10217 #define SH_BLTIN_STUA_L 13
10218 { 0, 10, 2 },
10219 #define SH_BLTIN_STUA_Q 14
10220 { 0, 10, 1 },
10221 #define SH_BLTIN_LDUA_L64 15
10222 { 2, 9 },
10223 #define SH_BLTIN_LDUA_Q64 16
10224 { 1, 9 },
10225 #define SH_BLTIN_STUA_L64 17
10226 { 0, 9, 2 },
10227 #define SH_BLTIN_STUA_Q64 18
10228 { 0, 9, 1 },
10229 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10230 #define SH_BLTIN_2 19
10231 #define SH_BLTIN_SU 19
10232 { 1, 2 },
10233 #define SH_BLTIN_3 20
10234 #define SH_BLTIN_SUS 20
10235 { 2, 2, 1 },
10236 #define SH_BLTIN_PSSV 21
10237 { 0, 8, 2, 2 },
10238 #define SH_BLTIN_XXUU 22
10239 #define SH_BLTIN_UUUU 22
10240 { 1, 1, 1, 1 },
10241 #define SH_BLTIN_PV 23
10242 { 0, 8 },
10243 #define SH_BLTIN_VP 24
10244 { 8, 0 },
10245 #define SH_BLTIN_UV 25
10246 { 1, 0 },
10247 #define SH_BLTIN_VU 26
10248 { 0, 1 },
10250 /* mcmv: operands considered unsigned. */
10251 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10252 /* mperm: control value considered unsigned int. */
10253 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10254 /* mshards_q: returns signed short. */
10255 /* nsb: takes long long arg, returns unsigned char. */
10256 static struct builtin_description bdesc[] =
10258 { sh1_builtin_p,
10259 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
10260 { sh1_builtin_p,
10261 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
10264 static tree sh_builtin_get_fpscr;
10265 static tree sh_builtin_set_fpscr;
10267 static void
10268 sh_init_builtins (void)
10270 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10271 memset (shared, 0, sizeof shared);
10273 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
10275 builtin_description* d = &bdesc[di];
10277 if (!d->is_enabled ())
10278 continue;
10280 tree type, arg_type = NULL_TREE;
10281 int signature = d->signature;
10283 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10284 type = shared[signature];
10285 else
10287 int has_result = signature_args[signature][0] != 0;
10288 tree args[3];
10290 if (! TARGET_FPU_ANY
10291 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10292 continue;
10293 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
10294 args[i] = NULL_TREE;
10295 for (int i = 3; ; i--)
10297 int arg = signature_args[signature][i];
10298 int opno = i - 1 + has_result;
10300 if (arg & 8)
10301 arg_type = ptr_type_node;
10302 else if (arg)
10303 arg_type = (*lang_hooks.types.type_for_mode)
10304 (insn_data[d->icode].operand[opno].mode, (arg & 1));
10305 else if (i)
10306 continue;
10307 else
10308 arg_type = void_type_node;
10309 if (i == 0)
10310 break;
10311 args[i-1] = arg_type;
10313 type = build_function_type_list (arg_type, args[0], args[1],
10314 args[2], NULL_TREE);
10315 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10316 shared[signature] = type;
10318 d->fndecl =
10319 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10320 NULL, NULL_TREE);
10321 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
10322 if (d->icode == CODE_FOR_sts_fpscr)
10323 sh_builtin_get_fpscr = d->fndecl;
10324 else if (d->icode == CODE_FOR_set_fpscr)
10325 sh_builtin_set_fpscr = d->fndecl;
10329 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
10331 static void
10332 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
10334 const unsigned SH_FE_INVALID = 64;
10335 const unsigned SH_FE_DIVBYZERO = 32;
10336 const unsigned SH_FE_OVERFLOW = 16;
10337 const unsigned SH_FE_UNDERFLOW = 8;
10338 const unsigned SH_FE_INEXACT = 4;
10339 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
10340 | SH_FE_DIVBYZERO
10341 | SH_FE_OVERFLOW
10342 | SH_FE_UNDERFLOW
10343 | SH_FE_INEXACT);
10344 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
10345 tree fenv_var, mask, ld_fenv, masked_fenv;
10346 tree new_fenv_var, reload_fenv, restore_fnenv;
10347 tree update_call, atomic_feraiseexcept, hold_fnclex;
10349 if (! TARGET_FPU_ANY)
10350 return;
10352 /* Generate the equivalent of :
10353 unsigned int fenv_var;
10354 fenv_var = __builtin_sh_get_fpscr ();
10356 unsigned int masked_fenv;
10357 masked_fenv = fenv_var & mask;
10359 __builtin_sh_set_fpscr (masked_fenv); */
10361 fenv_var = create_tmp_var_raw (unsigned_type_node);
10362 mask = build_int_cst (unsigned_type_node,
10363 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
10364 | SH_FE_ALL_EXCEPT));
10365 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
10366 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
10367 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
10368 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10369 fenv_var = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
10370 build2 (COMPOUND_EXPR, void_type_node, masked_fenv,
10371 ld_fenv),
10372 NULL_TREE, NULL_TREE);
10373 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, hold_fnclex);
10375 /* Store the value of masked_fenv to clear the exceptions:
10376 __builtin_sh_set_fpscr (masked_fenv); */
10378 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10380 /* Generate the equivalent of :
10381 unsigned int new_fenv_var;
10382 new_fenv_var = __builtin_sh_get_fpscr ();
10384 __builtin_sh_set_fpscr (fenv_var);
10386 __atomic_feraiseexcept (new_fenv_var); */
10388 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
10389 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
10390 build_call_expr (sh_builtin_get_fpscr, 0));
10391 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
10392 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
10393 update_call = build_call_expr (atomic_feraiseexcept, 1,
10394 fold_convert (integer_type_node,
10395 new_fenv_var));
10396 *update = build2 (COMPOUND_EXPR, void_type_node,
10397 build2 (COMPOUND_EXPR, void_type_node,
10398 reload_fenv, restore_fnenv), update_call);
10401 /* Implements target hook vector_mode_supported_p. */
10402 bool
10403 sh_vector_mode_supported_p (machine_mode mode ATTRIBUTE_UNUSED)
10405 return false;
10408 bool
10409 sh_frame_pointer_required (void)
10411 /* If needed override this in other tm.h files to cope with various OS
10412 lossage requiring a frame pointer. */
10413 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10414 return true;
10416 if (crtl->profile)
10417 return true;
10419 return false;
10422 /* Implements target hook dwarf_calling_convention. Return an enum
10423 of dwarf_calling_convention. */
10425 sh_dwarf_calling_convention (const_tree func)
10427 if (sh_attr_renesas_p (func))
10428 return DW_CC_GNU_renesas_sh;
10430 return DW_CC_normal;
10433 /* Returns the sh builtin decl for CODE. */
10434 static tree
10435 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10437 if (code >= ARRAY_SIZE (bdesc))
10438 return error_mark_node;
10440 if (!bdesc[code].is_enabled ())
10441 return error_mark_node;
10443 return bdesc[code].fndecl;
10446 /* Expand an expression EXP that calls a built-in function,
10447 with result going to TARGET if that's convenient
10448 (and in mode MODE if that's convenient).
10449 SUBTARGET may be used as the target for computing one of EXP's operands.
10450 IGNORE is nonzero if the value is to be ignored. */
10451 static rtx
10452 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10453 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10455 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10456 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10457 const struct builtin_description *d = &bdesc[fcode];
10458 enum insn_code icode = d->icode;
10459 int signature = d->signature;
10460 int nop = 0;
10461 rtx op[4];
10463 if (signature_args[signature][0])
10465 if (ignore)
10466 return NULL_RTX;
10468 machine_mode tmode = insn_data[icode].operand[0].mode;
10469 if (! target || GET_MODE (target) != tmode
10470 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10471 target = gen_reg_rtx (tmode);
10472 op[nop++] = target;
10474 else
10475 target = NULL_RTX;
10477 for (int i = 1; i <= 3; i++, nop++)
10479 if (! signature_args[signature][i])
10480 break;
10481 tree arg = CALL_EXPR_ARG (exp, i - 1);
10482 if (arg == error_mark_node)
10483 return const0_rtx;
10485 machine_mode opmode;
10486 tree optype;
10487 if (signature_args[signature][i] & 8)
10489 opmode = ptr_mode;
10490 optype = ptr_type_node;
10492 else
10494 opmode = insn_data[icode].operand[nop].mode;
10495 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10498 machine_mode argmode = TYPE_MODE (TREE_TYPE (arg));
10499 if (argmode != opmode)
10500 arg = build1 (NOP_EXPR, optype, arg);
10501 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10502 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10503 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10506 rtx pat = NULL_RTX;
10508 switch (nop)
10510 case 1:
10511 pat = (*insn_data[d->icode].genfun) (op[0]);
10512 break;
10513 case 2:
10514 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10515 break;
10516 case 3:
10517 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10518 break;
10519 case 4:
10520 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10521 break;
10522 default:
10523 gcc_unreachable ();
10525 if (! pat)
10526 return NULL_RTX;
10527 emit_insn (pat);
10528 return target;
10531 /* Implement TARGET_HARD_REGNO_NREGS. On the SH all but the XD regs are
10532 UNITS_PER_WORD bits wide. */
10534 static unsigned int
10535 sh_hard_regno_nregs (unsigned int regno, machine_mode mode)
10537 if (XD_REGISTER_P (regno))
10538 return CEIL (GET_MODE_SIZE (mode), 2 * UNITS_PER_WORD);
10539 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
10542 /* Implement TARGET_HARD_REGNO_MODE_OK.
10544 We can allow any mode in any general register. The special registers
10545 only allow SImode. Don't allow any mode in the PR.
10547 We cannot hold DCmode values in the XD registers because alter_reg
10548 handles subregs of them incorrectly. We could work around this by
10549 spacing the XD registers like the DR registers, but this would require
10550 additional memory in every compilation to hold larger register vectors.
10551 We could hold SFmode / SCmode values in XD registers, but that
10552 would require a tertiary reload when reloading from / to memory,
10553 and a secondary reload to reload from / to general regs; that
10554 seems to be a losing proposition.
10556 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10557 it won't be ferried through GP registers first. */
10558 static bool
10559 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10561 if (SPECIAL_REGISTER_P (regno))
10562 return mode == SImode;
10564 if (regno == FPUL_REG)
10565 return (mode == SImode || mode == SFmode);
10567 if (FP_REGISTER_P (regno) && mode == SFmode)
10568 return true;
10570 if (mode == V2SFmode)
10572 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10573 || GENERAL_REGISTER_P (regno)))
10574 return true;
10575 else
10576 return false;
10579 if (mode == V4SFmode)
10581 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10582 || GENERAL_REGISTER_P (regno))
10583 return true;
10584 else
10585 return false;
10588 if (mode == V16SFmode)
10589 return regno == FIRST_XD_REG;
10591 if (FP_REGISTER_P (regno))
10593 if (mode == SFmode
10594 || mode == SImode
10595 || ((TARGET_SH2E) && mode == SCmode)
10596 || (((TARGET_FPU_DOUBLE && mode == DFmode) || mode == DCmode)
10597 && ((regno - FIRST_FP_REG) & 1) == 0)
10598 || (TARGET_SH4 && mode == TImode
10599 && ((regno - FIRST_FP_REG) & 3) == 0))
10600 return true;
10601 else
10602 return false;
10605 if (XD_REGISTER_P (regno))
10606 return mode == DFmode;
10608 if (regno == PR_REG)
10609 return mode == SImode;
10611 if (regno == FPSCR_REG)
10612 return mode == SImode;
10614 return true;
10617 /* Implement TARGET_MODES_TIEABLE_P.
10619 If TARGET_HARD_REGNO_MODE_OK could produce different values for MODE1
10620 and MODE2, for any hard reg, then this must be false for correct output.
10621 That's the case for xd registers: we don't hold SFmode values in
10622 them, so we can't tie an SFmode pseudos with one in another
10623 floating-point mode. */
10625 static bool
10626 sh_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10628 return (mode1 == mode2
10629 || (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
10630 && (mode1 != SFmode && mode2 != SFmode)));
10633 /* Specify the modes required to caller save a given hard regno.
10634 choose_hard_reg_mode chooses mode based on TARGET_HARD_REGNO_MODE_OK
10635 and returns ?Imode for float regs when sh_hard_regno_mode_ok
10636 permits integer modes on them. That makes LRA's split process
10637 unhappy. See PR55212.
10639 machine_mode
10640 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
10641 machine_mode mode)
10643 if (FP_REGISTER_P (regno)
10644 && (mode == SFmode
10645 || mode == SCmode
10646 || ((mode == DFmode || mode == DCmode)
10647 && ((regno - FIRST_FP_REG) & 1) == 0)))
10648 return mode;
10650 return choose_hard_reg_mode (regno, nregs, false);
10653 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10654 static bool
10655 sh_can_change_mode_class (machine_mode from, machine_mode to,
10656 reg_class_t rclass)
10658 /* We want to enable the use of SUBREGs as a means to
10659 VEC_SELECT a single element of a vector. */
10661 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
10662 This can be problematic when SFmode vector subregs need to be accessed
10663 on the stack with displacement addressing, as it happens with -O0.
10664 Thus we disallow the mode change for -O0. */
10665 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10666 return optimize ? !reg_classes_intersect_p (GENERAL_REGS, rclass) : true;
10668 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10670 if (TARGET_LITTLE_ENDIAN)
10672 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10673 return !reg_classes_intersect_p (DF_REGS, rclass);
10675 else
10677 if (GET_MODE_SIZE (from) < 8)
10678 return !reg_classes_intersect_p (DF_REGS, rclass);
10681 return true;
10684 /* Return true if registers in machine mode MODE will likely be
10685 allocated to registers in small register classes. */
10686 bool
10687 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
10689 return true;
10692 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10693 that label is used. */
10694 void
10695 sh_mark_label (rtx address, int nuses)
10697 if (GOTOFF_P (address))
10699 /* Extract the label or symbol. */
10700 address = XEXP (address, 0);
10701 if (GET_CODE (address) == PLUS)
10702 address = XEXP (address, 0);
10703 address = XVECEXP (address, 0, 0);
10705 if (GET_CODE (address) == LABEL_REF
10706 && LABEL_P (XEXP (address, 0)))
10707 LABEL_NUSES (XEXP (address, 0)) += nuses;
10710 /* Compute extra cost of moving data between one register class
10711 and another.
10713 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10714 uses this information. Hence, the general register <-> floating point
10715 register information here is not used for SFmode. */
10716 static int
10717 sh_register_move_cost (machine_mode mode,
10718 reg_class_t srcclass, reg_class_t dstclass)
10720 if (dstclass == T_REGS || dstclass == PR_REGS)
10721 return 10;
10723 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10724 return 4;
10726 if (mode == SImode && TARGET_FMOVD
10727 && REGCLASS_HAS_FP_REG (srcclass)
10728 && REGCLASS_HAS_FP_REG (dstclass))
10729 return 4;
10731 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10732 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10734 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10735 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10736 return 9;
10738 if ((REGCLASS_HAS_FP_REG (dstclass)
10739 && REGCLASS_HAS_GENERAL_REG (srcclass))
10740 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10741 && REGCLASS_HAS_FP_REG (srcclass)))
10743 /* Discourage trying to use fp regs for a pointer. This also
10744 discourages fp regs with SImode because Pmode is an alias
10745 of SImode on this target. See PR target/48596. */
10746 int addend = (mode == Pmode) ? 40 : 0;
10748 return ((TARGET_FMOVD ? 8 : 12) + addend)
10749 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10752 if ((dstclass == FPUL_REGS
10753 && REGCLASS_HAS_GENERAL_REG (srcclass))
10754 || (srcclass == FPUL_REGS
10755 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10756 return 5;
10758 if ((dstclass == FPUL_REGS
10759 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10760 || (srcclass == FPUL_REGS
10761 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10762 return 7;
10764 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10765 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10766 return 4;
10768 if (TARGET_FMOVD
10769 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10770 && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10771 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10773 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10776 static rtx
10777 emit_load_ptr (rtx reg, rtx addr)
10779 rtx mem = gen_const_mem (ptr_mode, addr);
10781 if (Pmode != ptr_mode)
10782 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10783 return emit_move_insn (reg, mem);
10786 static void
10787 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10788 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10789 tree function)
10791 CUMULATIVE_ARGS cum;
10792 int structure_value_byref = 0;
10793 rtx this_rtx, this_value, sibcall, funexp;
10794 rtx_insn *insns;
10795 tree funtype = TREE_TYPE (function);
10796 int simple_add = CONST_OK_FOR_ADD (delta);
10797 int did_load = 0;
10798 rtx scratch0, scratch1, scratch2;
10800 reload_completed = 1;
10801 epilogue_completed = 1;
10802 crtl->uses_only_leaf_regs = 1;
10804 emit_note (NOTE_INSN_PROLOGUE_END);
10806 /* Find the "this" pointer. We have such a wide range of ABIs for the
10807 SH that it's best to do this completely machine independently.
10808 "this" is passed as first argument, unless a structure return pointer
10809 comes first, in which case "this" comes second. */
10810 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10811 #ifndef PCC_STATIC_STRUCT_RETURN
10812 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10813 structure_value_byref = 1;
10814 #endif /* not PCC_STATIC_STRUCT_RETURN */
10815 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10817 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10819 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
10821 this_rtx
10822 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
10824 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10825 static chain pointer (even if you can't have nested virtual functions
10826 right now, someone might implement them sometime), and the rest of the
10827 registers are used for argument passing, are callee-saved, or reserved. */
10828 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10829 -ffixed-reg has been used. */
10830 if (! call_used_regs[0] || fixed_regs[0])
10831 error ("r0 needs to be available as a call-clobbered register");
10832 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10835 if (call_used_regs[1] && ! fixed_regs[1])
10836 scratch1 = gen_rtx_REG (ptr_mode, 1);
10837 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10838 pointing where to return struct values. */
10839 if (call_used_regs[3] && ! fixed_regs[3])
10840 scratch2 = gen_rtx_REG (Pmode, 3);
10843 this_value = plus_constant (Pmode, this_rtx, delta);
10844 if (vcall_offset
10845 && (simple_add || scratch0 != scratch1)
10846 && strict_memory_address_p (ptr_mode, this_value))
10848 emit_load_ptr (scratch0, this_value);
10849 did_load = 1;
10852 if (!delta)
10853 ; /* Do nothing. */
10854 else if (simple_add)
10855 emit_move_insn (this_rtx, this_value);
10856 else
10858 emit_move_insn (scratch1, GEN_INT (delta));
10859 emit_insn (gen_add2_insn (this_rtx, scratch1));
10862 if (vcall_offset)
10864 rtx offset_addr;
10866 if (!did_load)
10867 emit_load_ptr (scratch0, this_rtx);
10869 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
10870 if (strict_memory_address_p (ptr_mode, offset_addr))
10871 ; /* Do nothing. */
10872 else if (scratch0 != scratch1)
10874 /* scratch0 != scratch1, and we have indexed loads. Get better
10875 schedule by loading the offset into r1 and using an indexed
10876 load - then the load of r1 can issue before the load from
10877 (this_rtx + delta) finishes. */
10878 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10879 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10881 else if (CONST_OK_FOR_ADD (vcall_offset))
10883 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10884 offset_addr = scratch0;
10886 else if (scratch0 != scratch1)
10888 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10889 emit_insn (gen_add2_insn (scratch0, scratch1));
10890 offset_addr = scratch0;
10892 else
10893 gcc_unreachable (); /* FIXME */
10894 emit_load_ptr (scratch0, offset_addr);
10896 if (Pmode != ptr_mode)
10897 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10898 emit_insn (gen_add2_insn (this_rtx, scratch0));
10901 /* Generate a tail call to the target function. */
10902 if (! TREE_USED (function))
10904 assemble_external (function);
10905 TREE_USED (function) = 1;
10907 funexp = XEXP (DECL_RTL (function), 0);
10908 /* If the function is overridden, so is the thunk, hence we don't
10909 need GOT addressing even if this is a public symbol. */
10910 #if 0
10911 if (TARGET_SH1 && ! flag_weak)
10912 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10913 else
10914 #endif
10915 if (TARGET_SH2 && flag_pic)
10917 if (TARGET_FDPIC)
10919 sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
10920 XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
10922 else
10924 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10925 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10928 else
10930 emit_move_insn (scratch2, funexp);
10931 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10932 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10934 sibcall = emit_call_insn (sibcall);
10935 SIBLING_CALL_P (sibcall) = 1;
10936 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10937 emit_barrier ();
10939 /* Run just enough of rest_of_compilation to do scheduling and get
10940 the insns emitted. Note that use_thunk calls
10941 assemble_start_function and assemble_end_function. */
10943 insns = get_insns ();
10945 if (optimize > 0)
10947 if (! cfun->cfg)
10948 init_flow (cfun);
10949 split_all_insns_noflow ();
10952 sh_reorg ();
10953 shorten_branches (insns);
10954 final_start_function (insns, file, 1);
10955 final (insns, file, 1);
10956 final_end_function ();
10958 reload_completed = 0;
10959 epilogue_completed = 0;
10962 /* Return an RTX pair for the address and call site label of a function
10963 NAME of kind KIND, placing the result in TARGET if not NULL. For
10964 SFUNC_STATIC, if FDPIC, the LAB member of result will be set to
10965 (const_int 0) if jsr should be used, or a label_ref if bsrf should
10966 be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the
10967 address of the function itself, not a function descriptor, so they
10968 can only be used with functions not using the FDPIC register that
10969 are known to be called directory without a PLT entry. */
10971 function_symbol_result
10972 function_symbol (rtx target, const char *name, sh_function_kind kind)
10974 /* If this is not an ordinary function, the name usually comes from a
10975 string literal or an sprintf buffer. Make sure we use the same
10976 string consistently, so that cse will be able to unify address loads. */
10977 if (kind != FUNCTION_ORDINARY)
10978 name = IDENTIFIER_POINTER (get_identifier (name));
10979 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
10980 rtx lab = const0_rtx;
10981 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10982 if (flag_pic)
10983 switch (kind)
10985 case FUNCTION_ORDINARY:
10986 break;
10987 case SFUNC_GOT:
10989 rtx reg = target ? target : gen_reg_rtx (Pmode);
10991 emit_insn (gen_symGOT2reg (reg, sym));
10992 sym = reg;
10993 break;
10995 case SFUNC_STATIC:
10997 rtx reg = target ? target : gen_reg_rtx (Pmode);
10999 if (TARGET_FDPIC)
11001 /* We use PC-relative calls, since GOTOFF can only refer
11002 to writable data. This works along with sh_sfunc_call. */
11003 lab = PATTERN (gen_call_site ());
11004 emit_insn (gen_sym_label2reg (reg, sym, lab));
11006 else
11008 /* ??? To allow cse to work, we use GOTOFF relocations.
11009 we could add combiner patterns to transform this into
11010 straight pc-relative calls with sym2PIC / bsrf when
11011 label load and function call are still 1:1 and in the
11012 same basic block during combine. */
11013 emit_insn (gen_symGOTOFF2reg (reg, sym));
11016 sym = reg;
11017 break;
11020 if (target && sym != target)
11022 emit_move_insn (target, sym);
11023 return function_symbol_result (target, lab);
11025 return function_symbol_result (sym, lab);
11028 /* Find the number of the first general purpose register in S that
11029 is not set. */
11030 static int
11031 scavenge_reg (HARD_REG_SET *s)
11033 for (int r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11034 if (TEST_HARD_REG_BIT (*s, r))
11035 return r;
11036 return -1;
11040 sh_get_pr_initial_val (void)
11042 /* If we haven't finished rtl generation, there might be a nonlocal label
11043 that we haven't seen yet.
11044 ??? get_hard_reg_initial_val fails if it is called after register
11045 allocation has started, unless it has been called before for the
11046 same register. And even then, we end in trouble if we didn't use
11047 the register in the same basic block before. So call
11048 get_hard_reg_initial_val now and wrap it in an unspec if we might
11049 need to replace it. */
11050 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11051 combine can put the pseudo returned by get_hard_reg_initial_val into
11052 instructions that need a general purpose registers, which will fail to
11053 be recognized when the pseudo becomes allocated to PR. */
11054 rtx val = get_hard_reg_initial_val (Pmode, PR_REG);
11055 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11058 bool
11059 sh_expand_t_scc (rtx operands[])
11061 enum rtx_code code = GET_CODE (operands[1]);
11062 rtx target = operands[0];
11063 rtx op0 = operands[2];
11064 rtx op1 = operands[3];
11065 rtx result = target;
11067 if (!REG_P (op0) || REGNO (op0) != T_REG
11068 || !CONST_INT_P (op1))
11069 return false;
11070 if (!REG_P (result))
11071 result = gen_reg_rtx (SImode);
11072 HOST_WIDE_INT val = INTVAL (op1);
11073 if ((code == EQ && val == 1) || (code == NE && val == 0))
11074 emit_insn (gen_movt (result, get_t_reg_rtx ()));
11075 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11076 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
11077 else if (code == EQ || code == NE)
11078 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11079 else
11080 return false;
11081 if (result != target)
11082 emit_move_insn (target, result);
11083 return true;
11086 /* INSN is an sfunc; return the rtx that describes the address used. */
11087 static rtx
11088 extract_sfunc_addr (rtx insn)
11090 rtx pattern = PATTERN (insn);
11091 const int len = XVECLEN (pattern, 0);
11092 for (int i = 0; i < len; i++)
11094 rtx part = XVECEXP (pattern, 0, i);
11095 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11096 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11097 return XEXP (part, 0);
11099 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11100 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11103 /* Verify that the register in use_sfunc_addr still agrees with the address
11104 used in the sfunc. This prevents fill_slots_from_thread from changing
11105 use_sfunc_addr.
11106 INSN is the use_sfunc_addr instruction, and REG is the register it
11107 guards. */
11108 bool
11109 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
11111 /* Search for the sfunc. It should really come right after INSN. */
11112 while ((insn = NEXT_INSN (insn)))
11114 if (LABEL_P (insn) || JUMP_P (insn))
11115 break;
11116 if (! INSN_P (insn))
11117 continue;
11119 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
11120 insn = seq->insn (0);
11121 if (GET_CODE (PATTERN (insn)) != PARALLEL
11122 || get_attr_type (insn) != TYPE_SFUNC)
11123 continue;
11124 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11126 gcc_unreachable ();
11129 /* This function returns a constant rtx that represents 2**15 / pi in
11130 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
11131 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
11132 static GTY(()) rtx sh_fsca_sf2int_rtx;
11135 sh_fsca_sf2int (void)
11137 if (! sh_fsca_sf2int_rtx)
11139 REAL_VALUE_TYPE rv;
11141 real_from_string (&rv, "10430.378350470453");
11142 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11145 return sh_fsca_sf2int_rtx;
11148 /* This function returns a constant rtx that represents pi / 2**15 in
11149 SFmode. It's used to scale SFmode angles, in radians, to a
11150 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
11151 maps to 0x10000. */
11152 static GTY(()) rtx sh_fsca_int2sf_rtx;
11155 sh_fsca_int2sf (void)
11157 if (! sh_fsca_int2sf_rtx)
11159 REAL_VALUE_TYPE rv;
11161 real_from_string (&rv, "9.587379924285257e-5");
11162 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11165 return sh_fsca_int2sf_rtx;
11168 /* Initialize the CUMULATIVE_ARGS structure. */
11169 void
11170 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11171 tree fntype,
11172 rtx libname ATTRIBUTE_UNUSED,
11173 tree fndecl,
11174 signed int n_named_args,
11175 machine_mode mode)
11177 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11178 pcum->free_single_fp_reg = 0;
11179 pcum->outgoing = n_named_args != -1;
11181 /* FIXME: Should we check TARGET_HITACHI here ??? */
11182 pcum->renesas_abi = sh_attr_renesas_p (fntype);
11184 if (fntype)
11186 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11187 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11188 pcum->prototype_p = prototype_p (fntype);
11189 pcum->arg_count [(int) SH_ARG_INT] = false;
11191 else
11193 pcum->arg_count [(int) SH_ARG_INT] = 0;
11194 pcum->prototype_p = false;
11195 if (mode != VOIDmode)
11197 /* If the default ABI is the Renesas ABI then all library
11198 calls must assume that the library will be using the
11199 Renesas ABI. So if the function would return its result
11200 in memory then we must force the address of this memory
11201 block onto the stack. Ideally we would like to call
11202 targetm.calls.return_in_memory() here but we do not have
11203 the TYPE or the FNDECL available so we synthesize the
11204 contents of that function as best we can. */
11205 pcum->force_mem =
11206 (TARGET_DEFAULT & MASK_HITACHI)
11207 && (mode == BLKmode
11208 || (GET_MODE_SIZE (mode) > 4
11209 && !(mode == DFmode
11210 && TARGET_FPU_DOUBLE)));
11212 else
11213 pcum->force_mem = false;
11218 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
11220 enum rtx_code code = TRUNCATE;
11222 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11224 rtx inner = XEXP (x, 0);
11225 machine_mode inner_mode = GET_MODE (inner);
11227 if (inner_mode == mode)
11228 return inner;
11229 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11230 x = inner;
11231 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11232 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11234 code = GET_CODE (x);
11235 x = inner;
11238 return gen_rtx_fmt_e (code, mode, x);
11241 /* Load and store depend on the highpart of the address. However,
11242 set_attr_alternative does not give well-defined results before reload,
11243 so we must look at the rtl ourselves to see if any of the feeding
11244 registers is used in a memref.
11246 Return true iff INSN contains a MEM. */
11247 bool
11248 sh_contains_memref_p (rtx insn)
11250 subrtx_iterator::array_type array;
11251 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
11252 if (MEM_P (*iter))
11253 return true;
11254 return false;
11257 /* Return true iff INSN loads a banked register. */
11258 bool
11259 sh_loads_bankedreg_p (rtx insn)
11261 if (GET_CODE (PATTERN (insn)) == SET)
11263 rtx op = SET_DEST (PATTERN(insn));
11264 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11265 return true;
11268 return false;
11271 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
11272 static reg_class_t
11273 sh_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
11275 return rclass;
11278 /* Implement TARGET_SECONDARY_RELOAD. */
11279 static reg_class_t
11280 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11281 machine_mode mode, secondary_reload_info *sri)
11283 enum reg_class rclass = (enum reg_class) rclass_i;
11285 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
11286 && REG_P (XEXP (XEXP (x, 0), 0))
11287 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
11288 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11290 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
11291 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11293 if (REG_P (x) && REGNO (x) == GBR_REG)
11294 return NO_REGS;
11296 if (in_p)
11298 if (REGCLASS_HAS_FP_REG (rclass)
11299 && immediate_operand ((x), mode)
11300 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
11301 switch (mode)
11303 case E_SFmode:
11304 sri->icode = CODE_FOR_reload_insf__frn;
11305 return NO_REGS;
11306 case E_DFmode:
11307 sri->icode = CODE_FOR_reload_indf__frn;
11308 return NO_REGS;
11309 case E_SImode:
11310 /* ??? If we knew that we are in the appropriate mode -
11311 single precision - we could use a reload pattern directly. */
11312 return FPUL_REGS;
11313 default:
11314 abort ();
11316 if (rclass == FPUL_REGS
11317 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11318 || REGNO (x) == T_REG))
11319 || GET_CODE (x) == PLUS))
11320 return GENERAL_REGS;
11321 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11323 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11324 return GENERAL_REGS;
11325 else if (mode == SFmode)
11326 return FP_REGS;
11327 sri->icode = CODE_FOR_reload_insi__i_fpul;
11328 return NO_REGS;
11330 if (rclass == FPSCR_REGS
11331 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11332 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11333 return GENERAL_REGS;
11334 } /* end of input-only processing. */
11336 if (((REGCLASS_HAS_FP_REG (rclass)
11337 && (REG_P (x)
11338 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11339 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11340 && TARGET_FMOVD))))
11341 || (REGCLASS_HAS_GENERAL_REG (rclass)
11342 && REG_P (x)
11343 && FP_REGISTER_P (REGNO (x))))
11344 && (mode == SFmode || mode == SImode))
11345 return FPUL_REGS;
11346 if ((rclass == FPUL_REGS
11347 || (REGCLASS_HAS_FP_REG (rclass) && mode == SImode))
11348 && (MEM_P (x)
11349 || (REG_P (x)
11350 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11351 || REGNO (x) == T_REG
11352 || system_reg_operand (x, VOIDmode)))))
11354 if (rclass == FPUL_REGS)
11355 return GENERAL_REGS;
11356 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
11359 if ((rclass == MAC_REGS || rclass == PR_REGS)
11360 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11361 && rclass != REGNO_REG_CLASS (REGNO (x)))
11362 return GENERAL_REGS;
11364 /* If here fall back to loading FPUL register through general registers.
11365 This case can happen when movsi_ie insn is picked initially to
11366 load/store the FPUL register from/to another register, and then the
11367 other register is allocated on the stack. */
11368 if (rclass == FPUL_REGS && true_regnum (x) == -1)
11369 return GENERAL_REGS;
11371 /* Force mov.b / mov.w displacement addressing insn to use R0 as
11372 the other operand.
11373 On SH2A could also just leave it alone here, which would result in a
11374 4 byte move insn being generated instead. However, for this to work
11375 the insns must have the appropriate alternatives. */
11376 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11377 && satisfies_constraint_Sdd (x)
11378 && sh_disp_addr_displacement (x)
11379 <= sh_max_mov_insn_displacement (mode, false))
11380 return R0_REGS;
11382 /* When reload is trying to address a QImode or HImode subreg on the stack,
11383 force any subreg byte into R0_REGS, as this is going to become a
11384 displacement address.
11385 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
11386 is on the stack, the memref to it might already require a displacement
11387 and that has to be added to the final address. At this point we don't
11388 know the cumulative displacement so we assume the worst case. */
11389 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11390 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
11391 return R0_REGS;
11393 return NO_REGS;
11396 /* Return true if SUBST can't safely replace its equivalent during RA. */
11397 static bool
11398 sh_cannot_substitute_mem_equiv_p (rtx)
11400 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
11401 uses R0 and may cause spill failure when R0 is already used.
11402 We have to return true for that case at least.
11403 Moreover SH has strong R0 parity and also have not enough numbers of
11404 the hard registers to make the equiv substitution win in the size
11405 and the speed on average working sets. The pseudos produced to
11406 hold the equiv values can't get good hard registers for bad cases
11407 and end up memory save/restore insns which make the code worse. */
11408 return true;
11411 /* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
11412 static bool
11413 sh_legitimize_address_displacement (rtx *offset1, rtx *offset2,
11414 poly_int64 orig_offset,
11415 machine_mode mode)
11417 if ((TARGET_FPU_DOUBLE && mode == DFmode)
11418 || (TARGET_SH2E && mode == SFmode))
11419 return false;
11421 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, orig_offset);
11422 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
11424 *offset1 = adj.offset_adjust;
11425 *offset2 = adj.mov_disp;
11426 return true;
11429 return false;
11432 /* Return true if movsf insn should be splited with an additional
11433 register. */
11434 bool
11435 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
11437 /* op0 == op1 */
11438 if (rtx_equal_p (op0, op1))
11439 return true;
11440 /* fy, FQ, reg */
11441 if (GET_CODE (op1) == CONST_DOUBLE
11442 && ! satisfies_constraint_G (op1)
11443 && ! satisfies_constraint_H (op1)
11444 && REG_P (op0)
11445 && REG_P (op2))
11446 return true;
11447 /* f, r, y */
11448 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
11449 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
11450 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11451 return true;
11452 /* r, f, y */
11453 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
11454 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
11455 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11456 return true;
11458 return false;
11461 static void
11462 sh_conditional_register_usage (void)
11464 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
11465 if (! VALID_REGISTER_P (regno))
11466 fixed_regs[regno] = call_used_regs[regno] = 1;
11467 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
11468 if (flag_pic)
11470 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11471 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11473 if (TARGET_FDPIC)
11475 fixed_regs[PIC_REG] = 1;
11476 call_used_regs[PIC_REG] = 1;
11477 call_really_used_regs[PIC_REG] = 1;
11479 /* Renesas saves and restores mac registers on call. */
11480 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
11482 call_really_used_regs[MACH_REG] = 0;
11483 call_really_used_regs[MACL_REG] = 0;
11486 for (int regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
11487 if (! fixed_regs[regno] && call_really_used_regs[regno])
11488 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
11490 call_really_used_regs[FPSCR_MODES_REG] = 0;
11491 call_really_used_regs[FPSCR_STAT_REG] = 0;
11494 /* Implement TARGET_LEGITIMATE_CONSTANT_P
11496 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
11497 static bool
11498 sh_legitimate_constant_p (machine_mode mode, rtx x)
11500 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
11502 rtx base, offset;
11503 split_const (x, &base, &offset);
11505 if (GET_CODE (base) == SYMBOL_REF
11506 && !offset_within_block_p (base, INTVAL (offset)))
11507 return false;
11510 if (TARGET_FDPIC
11511 && (SYMBOLIC_CONST_P (x)
11512 || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
11513 && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
11514 return false;
11516 return GET_CODE (x) != CONST_DOUBLE
11517 || mode == DFmode || mode == SFmode
11518 || mode == DImode || GET_MODE (x) == VOIDmode;
11521 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11523 static void
11524 sh_init_sync_libfuncs (void)
11526 init_sync_libfuncs (UNITS_PER_WORD);
11529 /* Return true if it is appropriate to emit `ret' instructions in the
11530 body of a function. */
11531 bool
11532 sh_can_use_simple_return_p (void)
11534 if (! reload_completed || frame_pointer_needed)
11535 return false;
11537 /* Moving prologue around does't reduce the size. */
11538 if (optimize_function_for_size_p (cfun))
11539 return false;
11541 /* Finally, allow for pr save. */
11542 HARD_REG_SET live_regs_mask;
11543 int d = calc_live_regs (&live_regs_mask);
11545 if (rounded_frame_size (d) > 4)
11546 return false;
11548 return true;
11551 /*------------------------------------------------------------------------------
11552 Address mode optimization support code
11555 typedef HOST_WIDE_INT disp_t;
11556 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
11557 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
11558 static const disp_t INVALID_DISP = MAX_DISP;
11560 /* A memory reference which is described by a base register and a
11561 displacement. */
11562 class base_reg_disp
11564 public:
11565 base_reg_disp (rtx br, disp_t d);
11567 bool is_reg (void) const;
11568 bool is_disp (void) const;
11569 rtx reg (void) const;
11570 disp_t disp (void) const;
11572 private:
11573 rtx reg_;
11574 disp_t disp_;
11577 inline
11578 base_reg_disp::base_reg_disp (rtx br, disp_t d)
11579 : reg_ (br), disp_ (d)
11583 inline bool
11584 base_reg_disp::is_reg (void) const
11586 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
11589 inline bool
11590 base_reg_disp::is_disp (void) const
11592 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
11595 inline rtx
11596 base_reg_disp::reg (void) const
11598 return reg_;
11601 inline disp_t
11602 base_reg_disp::disp (void) const
11604 return disp_;
11607 /* Find the base register and calculate the displacement for a given
11608 address rtx 'x'. */
11609 static base_reg_disp
11610 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
11611 rtx base_reg = NULL)
11613 if (REG_P (x))
11615 if (REGNO (x) == GBR_REG)
11616 return base_reg_disp (x, disp);
11618 /* We've reached a hard-reg. This is probably the point where
11619 function args are copied to pseudos. Do not go any further and
11620 stick to the pseudo. If the original mem addr was in a hard reg
11621 from the beginning, it will become the base reg. */
11622 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
11623 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
11625 /* Find the def of the reg and trace it. If there are more than one
11626 defs and they are not the same, assume it's not safe to proceed. */
11627 rtx_insn* last_i = NULL;
11628 rtx last_set = NULL;
11629 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
11630 d = DF_REF_NEXT_REG (d))
11632 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
11634 /* Accept multiple defs, as long as they are equal. */
11635 if (last_set == NULL || rtx_equal_p (last_set, set))
11637 last_i = DF_REF_INSN (d);
11638 last_set = set;
11640 else
11642 last_i = NULL;
11643 last_set = NULL;
11644 break;
11648 if (last_set != NULL && last_i != NULL)
11649 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
11650 XEXP (last_set, 0));
11652 /* When here, no previous insn was found that sets the reg.
11653 The input reg is already the base reg. */
11654 return base_reg_disp (x, disp);
11657 else if (GET_CODE (x) == PLUS)
11659 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
11660 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
11662 /* Either left or right val must be a reg.
11663 We don't handle the case of 'reg + reg' here. */
11664 if (left_val.is_reg () && right_val.is_disp ())
11665 return base_reg_disp (left_val.reg (), left_val.disp ()
11666 + right_val.disp () + disp);
11667 else if (right_val.is_reg () && left_val.is_disp ())
11668 return base_reg_disp (right_val.reg (), right_val.disp ()
11669 + left_val.disp () + disp);
11670 else
11671 return base_reg_disp (base_reg, disp);
11674 else if (CONST_INT_P (x))
11675 return base_reg_disp (NULL, disp + INTVAL (x));
11677 /* Didn't find anything useful. */
11678 return base_reg_disp (base_reg, disp);
11681 /* Given an insn and a memory operand, try to find an equivalent GBR
11682 based memory address and return the corresponding new memory address.
11683 Return NULL_RTX if not found. */
11685 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
11687 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
11688 return NULL_RTX;
11690 /* Leave post/pre inc/dec or any other side effect addresses alone. */
11691 if (side_effects_p (XEXP (mem, 0)))
11692 return NULL_RTX;
11694 /* When not optimizing there might be no dataflow available. */
11695 if (df == NULL)
11696 return NULL_RTX;
11698 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
11700 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
11702 /* If GBR is marked as call clobbered we bail out if we see a call.
11703 FIXME: Actually should check if this mem refers to the gbr value
11704 before or after the call. If there is a store_gbr preceeding this
11705 mem, it's safe to use GBR for this mem.
11707 If GBR is not marked as call clobbered, but there is some other
11708 def than a call, it's probably a load_gbr upon which we also
11709 bail out to be on the safe side.
11710 FIXME: Should check if we have a use-after-def case, such as
11711 the call case above. */
11712 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
11713 d = DF_REF_NEXT_REG (d))
11715 if (CALL_P (DF_REF_INSN (d)))
11717 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
11718 return NULL_RTX;
11719 else
11720 continue;
11722 else
11723 return NULL_RTX;
11726 rtx disp = GEN_INT (gbr_disp.disp ());
11727 if (gbr_displacement (disp, GET_MODE (mem)))
11728 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
11731 return NULL_RTX;
11734 /*------------------------------------------------------------------------------
11735 Manual insn combine support code.
11738 /* Return true if the specified insn contains any UNSPECs or
11739 UNSPEC_VOLATILEs. */
11740 static bool
11741 sh_unspec_insn_p (rtx x)
11743 subrtx_iterator::array_type array;
11744 FOR_EACH_SUBRTX (i, array, x, ALL)
11745 if (*i != NULL
11746 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
11747 return true;
11749 return false;
11752 /* Return true if the register operands of the specified insn are modified
11753 between the specified from and to insns (exclusive of those two). */
11754 bool
11755 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
11756 const rtx_insn* from,
11757 const rtx_insn* to)
11759 /* FIXME: Return true for multiple sets for now. */
11760 rtx s = single_set (operands_insn);
11761 if (s == NULL_RTX)
11762 return true;
11764 subrtx_iterator::array_type array;
11765 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
11766 if (*i != NULL &&
11767 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
11768 return true;
11770 return false;
11773 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
11774 negates the T bit and stores the result in the T bit. */
11775 bool
11776 sh_is_nott_insn (const rtx_insn* i)
11778 return i != NULL && GET_CODE (PATTERN (i)) == SET
11779 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
11780 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
11784 sh_movt_set_dest (const rtx_insn* i)
11786 return i == NULL ? NULL : sh_movt_set_dest (PATTERN (i));
11790 sh_movt_set_dest (const_rtx pat)
11792 return GET_CODE (pat) == SET
11793 && arith_reg_dest (XEXP (pat, 0), SImode)
11794 && t_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11797 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
11798 that stores the negated T bit in a register, and return the destination
11799 register rtx, or null. */
11801 sh_movrt_set_dest (const rtx_insn* i)
11803 return i == NULL ? NULL : sh_movrt_set_dest (PATTERN (i));
11807 sh_movrt_set_dest (const_rtx pat)
11809 /* The negc movrt replacement is inside a parallel. */
11810 if (GET_CODE (pat) == PARALLEL)
11811 pat = XVECEXP (pat, 0, 0);
11813 return GET_CODE (pat) == SET
11814 && arith_reg_dest (XEXP (pat, 0), SImode)
11815 && negt_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11819 /* Given an insn and a reg number, tell whether the reg dies or is unused
11820 after the insn. */
11821 bool
11822 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
11824 return find_regno_note (i, REG_DEAD, regno) != NULL
11825 || find_regno_note (i, REG_UNUSED, regno) != NULL;
11828 /* Given an insn and a reg number, remove reg dead or reg unused notes to
11829 mark it as being used after the insn. */
11830 void
11831 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
11833 if (rtx n = find_regno_note (i, REG_DEAD, regno))
11834 remove_note (i, n);
11835 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
11836 remove_note (i, n);
11839 /* Given an insn check if it contains any post/pre inc/dec mem operands and
11840 add the REG_INC notes accordingly.
11841 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
11842 FIXME: This function is currently used by peephole2 patterns because
11843 the peephole2 pass does not preserve REG_INC notes. If the notes
11844 are dropped the following passes will do wrong things. */
11845 rtx_insn*
11846 sh_check_add_incdec_notes (rtx_insn* i)
11848 struct for_each_inc_dec_clb
11850 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
11851 rtx dest, rtx src ATTRIBUTE_UNUSED,
11852 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
11854 gcc_assert (REG_P (dest));
11856 rtx_insn* i = (rtx_insn*)arg;
11857 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
11858 add_reg_note (i, REG_INC, dest);
11860 return 0;
11864 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
11865 return i;
11868 /* Given a move insn destiation and a source, make sure that the move source
11869 operand is not a post-inc mem load with the same address reg as the
11870 destination. Returns the modified source operand with the post-inc removed
11871 if necessary. */
11873 sh_remove_overlapping_post_inc (rtx dst, rtx src)
11875 if (!MEM_P (src))
11876 return src;
11878 rtx addr = XEXP (src, 0);
11880 if (GET_CODE (addr) == POST_INC
11881 && reg_overlap_mentioned_p (XEXP (addr, 0), dst))
11882 return replace_equiv_address (src, XEXP (addr, 0));
11884 gcc_assert (GET_CODE (addr) != POST_MODIFY);
11885 return src;
11888 /* Emit a move insn that is safe to be used in peephole patterns. */
11889 rtx_insn*
11890 sh_peephole_emit_move_insn (rtx dst, rtx src)
11892 return sh_check_add_incdec_notes (
11893 emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src)));
11896 /* Given an op rtx and an insn, try to find out whether the result of the
11897 specified op consists only of logical operations on T bit stores. */
11898 bool
11899 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
11901 if (!logical_operator (op, SImode))
11902 return false;
11904 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
11905 int op_is_t_count = 0;
11907 for (int i = 0; i < 2; ++i)
11909 if (t_reg_operand (ops[i], VOIDmode)
11910 || negt_reg_operand (ops[i], VOIDmode))
11911 op_is_t_count++;
11913 else
11915 set_of_reg op_set = sh_find_set_of_reg
11916 (ops[i], insn, prev_nonnote_nondebug_insn_bb);
11917 if (op_set.set_src == NULL_RTX)
11918 continue;
11920 if (t_reg_operand (op_set.set_src, VOIDmode)
11921 || negt_reg_operand (op_set.set_src, VOIDmode)
11922 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
11923 op_is_t_count++;
11927 return op_is_t_count == 2;
11930 /* Given the operand that is extended in a sign/zero extend insn, and the
11931 insn, try to figure out whether the sign/zero extension can be replaced
11932 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
11933 NULL_RTX otherwise. */
11935 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
11937 if (REG_P (extended_op))
11938 extended_op = extended_op;
11939 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
11940 extended_op = SUBREG_REG (extended_op);
11941 else
11942 return NULL_RTX;
11944 /* Reg moves must be of the same mode. */
11945 if (GET_MODE (extended_op) != SImode)
11946 return NULL_RTX;
11948 set_of_reg s = sh_find_set_of_reg (extended_op, insn,
11949 prev_nonnote_nondebug_insn_bb);
11950 if (s.set_src == NULL_RTX)
11951 return NULL_RTX;
11953 if (t_reg_operand (s.set_src, VOIDmode)
11954 || negt_reg_operand (s.set_src, VOIDmode))
11955 return extended_op;
11957 /* If the zero extended reg was formed by a logical operation, check the
11958 operands of the logical operation. If both originated from T bit
11959 stores the zero extension can be eliminated. */
11960 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
11961 return extended_op;
11963 return NULL_RTX;
11966 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
11967 figure out whether it should be converted into a movt-xor sequence in
11968 the movrt_negc splitter.
11969 Returns true if insns have been modified and the splitter has succeeded. */
11970 bool
11971 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
11973 /* In cases such as
11974 tst r4,r4
11975 mov #-1,r1
11976 negc r1,r1
11977 tst r4,r4
11978 we can replace the T bit clobbering negc with a movt-xor sequence and
11979 eliminate the redundant comparison.
11980 Because the xor insn depends on register allocation results, allow this
11981 only before reload. */
11982 if (!can_create_pseudo_p ())
11983 return false;
11985 set_of_reg t_before_negc = sh_find_set_of_reg
11986 (get_t_reg_rtx (), curr_insn, prev_nonnote_nondebug_insn_bb);
11987 set_of_reg t_after_negc = sh_find_set_of_reg
11988 (get_t_reg_rtx (), curr_insn, next_nonnote_nondebug_insn_bb);
11990 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
11991 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
11992 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11993 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
11994 t_before_negc.insn,
11995 t_after_negc.insn)
11996 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11997 && !sh_unspec_insn_p (t_after_negc.insn)
11998 && !volatile_insn_p (PATTERN (t_after_negc.insn))
11999 && !side_effects_p (PATTERN (t_after_negc.insn))
12000 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
12002 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
12003 set_insn_deleted (t_after_negc.insn);
12004 return true;
12006 else
12007 return false;
12010 /* Given a reg and the current insn, see if the value of the reg originated
12011 from a sign or zero extension and return the discovered information. */
12012 sh_extending_set_of_reg
12013 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
12015 if (reg == NULL)
12016 return sh_extending_set_of_reg (curr_insn);
12018 if (SUBREG_P (reg))
12019 reg = SUBREG_REG (reg);
12021 if (!REG_P (reg))
12022 return sh_extending_set_of_reg (curr_insn);
12024 /* FIXME: Also search the predecessor basic blocks. It seems that checking
12025 only the adjacent predecessor blocks would cover most of the cases.
12026 Also try to look through the first extension that we hit. There are some
12027 cases, where a zero_extend is followed an (implicit) sign_extend, and it
12028 fails to see the sign_extend. */
12029 sh_extending_set_of_reg result = sh_find_set_of_reg
12030 (reg, curr_insn, prev_nonnote_nondebug_insn_bb, true);
12032 if (result.set_src != NULL)
12034 if (GET_CODE (result.set_src) == SIGN_EXTEND
12035 || GET_CODE (result.set_src) == ZERO_EXTEND)
12037 if (dump_file)
12038 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12039 "explicitly sign/zero extended in insn %d\n",
12040 REGNO (reg), INSN_UID (result.insn));
12041 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
12042 result.ext_code = GET_CODE (result.set_src);
12044 else if (MEM_P (result.set_src)
12045 && (GET_MODE (result.set_src) == QImode
12046 || GET_MODE (result.set_src) == HImode)
12047 && !sh_unspec_insn_p (result.insn))
12049 /* On SH QIHImode memory loads always sign extend. However, in
12050 some cases where it seems that the higher bits are not
12051 interesting, the loads will not be expanded as sign extending
12052 insns, but as QIHImode loads into QIHImode regs. We report that
12053 the reg has been sign extended by the mem load. When it is used
12054 as such, we must convert the mem load into a sign extending insn,
12055 see also sh_extending_set_of_reg::use_as_extended_reg. */
12056 if (dump_file)
12057 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12058 "implicitly sign extended in insn %d\n",
12059 REGNO (reg), INSN_UID (result.insn));
12060 result.from_mode = GET_MODE (result.set_src);
12061 result.ext_code = SIGN_EXTEND;
12065 return result;
12068 /* Given a reg that is known to be sign or zero extended at some insn,
12069 take the appropriate measures so that the extended value can be used as
12070 a reg at the specified insn and return the resulting reg rtx. */
12072 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
12074 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
12075 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
12076 gcc_assert (from_mode == QImode || from_mode == HImode);
12078 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
12080 if (dump_file)
12081 fprintf (dump_file,
12082 "use_as_extended_reg: converting non-extending mem load in "
12083 "insn %d into sign-extending load\n", INSN_UID (insn));
12085 rtx r = gen_reg_rtx (SImode);
12086 rtx_insn* i0;
12087 if (from_mode == QImode)
12088 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
12089 else if (from_mode == HImode)
12090 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
12091 else
12092 gcc_unreachable ();
12094 emit_insn_after (
12095 gen_move_insn (XEXP (set_rtx, 0),
12096 gen_lowpart (GET_MODE (set_src), r)), i0);
12097 set_insn_deleted (insn);
12098 return r;
12100 else
12102 rtx extension_dst = XEXP (set_rtx, 0);
12103 if (GET_MODE (extension_dst) != SImode)
12104 extension_dst = simplify_gen_subreg (SImode, extension_dst,
12105 GET_MODE (extension_dst), 0);
12106 if (modified_between_p (extension_dst, insn, use_at_insn))
12108 if (dump_file)
12109 fprintf (dump_file,
12110 "use_as_extended_reg: dest reg %d of extending insn %d is "
12111 "modified, inserting a reg-reg copy\n",
12112 REGNO (extension_dst), INSN_UID (insn));
12114 rtx r = gen_reg_rtx (SImode);
12115 emit_insn_after (gen_move_insn (r, extension_dst), insn);
12116 return r;
12118 else
12120 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
12121 return extension_dst;
12126 bool
12127 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
12129 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
12130 && (from_mode == QImode || from_mode == HImode)
12131 && set_src != NULL)
12132 return arith_reg_operand (XEXP (set_src, 0), from_mode);
12133 else
12134 return false;
12138 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
12140 gcc_assert (can_use_as_unextended_reg ());
12142 rtx r = XEXP (set_src, 0);
12143 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
12145 if (modified_between_p (r, insn, use_at_insn))
12147 rtx r1 = gen_reg_rtx (SImode);
12148 emit_insn_after (gen_move_insn (r1, r0), insn);
12149 return r1;
12151 else
12153 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
12154 ? REGNO (SUBREG_REG (r))
12155 : REGNO (r));
12156 return r0;
12160 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
12161 perform the necessary checks on the operands and split it accordingly. */
12162 void
12163 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
12164 int subreg_offset, rtx operands[])
12166 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
12168 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
12169 curr_insn);
12170 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
12171 curr_insn);
12173 /* If one of the operands is known to be zero extended, that's already
12174 sufficient to mask out the unwanted high bits. */
12175 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
12177 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12178 operands[1]));
12179 return;
12181 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
12183 emit_insn (gen_tstsi_t (operands[0],
12184 eop1.use_as_extended_reg (curr_insn)));
12185 return;
12188 /* None of the operands seem to be zero extended.
12189 If both are sign extended it's OK, too. */
12190 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
12191 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
12193 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12194 eop1.use_as_extended_reg (curr_insn)));
12195 return;
12198 /* Otherwise we have to insert a zero extension on one of the operands to
12199 mask out the unwanted high bits.
12200 Prefer the operand that has no known extension. */
12201 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
12202 std::swap (operands[0], operands[1]);
12204 rtx tmp0 = gen_reg_rtx (SImode);
12205 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
12206 GET_MODE (operands[0]), subreg_offset);
12207 emit_insn (subreg_mode == QImode
12208 ? gen_zero_extendqisi2 (tmp0, tmp1)
12209 : gen_zero_extendhisi2 (tmp0, tmp1));
12210 emit_insn (gen_tstsi_t (tmp0, operands[1]));
12213 /* A helper class to increment/decrement a counter variable each time a
12214 function is entered/left. */
12215 class scope_counter
12217 public:
12218 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
12220 ~scope_counter (void)
12222 --m_counter;
12223 gcc_assert (m_counter >= 0);
12226 int count (void) const { return m_counter; }
12228 private:
12229 int& m_counter;
12232 /* Given an rtx x, determine whether the expression can be used to create
12233 an insn that calulates x and stores the result in the T bit.
12234 This is used by the 'treg_set_expr' predicate to construct insns sequences
12235 where T bit results are fed into other insns, such as addc, subc, negc
12236 insns.
12238 FIXME: The patterns that expand 'treg_set_expr' operands tend to
12239 distinguish between 'positive' and 'negative' forms. For now this has to
12240 be done in the preparation code. We could also introduce
12241 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
12242 two different patterns for the 'postive' and 'negative' forms. However,
12243 the total amount of lines of code seems to be about the same and the
12244 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
12245 recog function would need to look inside the expression by temporarily
12246 splitting it. */
12247 static int sh_recog_treg_set_expr_reent_count = 0;
12249 bool
12250 sh_recog_treg_set_expr (rtx op, machine_mode mode)
12252 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
12254 /* Limit the recursion count to avoid nested expressions which we can't
12255 resolve to a single treg set insn. */
12256 if (recursion.count () > 1)
12257 return false;
12259 /* Early accept known possible operands before doing recog. */
12260 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode)
12261 || negt_reg_operand (op, mode))
12262 return true;
12264 /* Early reject impossible operands before doing recog.
12265 There are some (set ((t) (subreg ...))) patterns, but we must be careful
12266 not to allow any invalid reg-reg or mem-reg moves, or else other passes
12267 such as lower-subreg will bail out. Some insns such as SH4A movua are
12268 done with UNSPEC, so must reject those, too, or else it would result
12269 in an invalid reg -> treg move. */
12270 if (CONST_INT_P (op) || register_operand (op, mode)
12271 || memory_operand (op, mode) || sh_unspec_insn_p (op))
12272 return false;
12274 if (!can_create_pseudo_p ())
12275 return false;
12277 /* expand_debug_locations may call this to compute rtx costs at
12278 very early stage. In that case, don't make new insns here to
12279 avoid codegen differences with -g. */
12280 if (currently_expanding_to_rtl)
12281 return false;
12283 /* We are going to invoke recog in a re-entrant way and thus
12284 have to capture its current state and restore it afterwards. */
12285 recog_data_d prev_recog_data = recog_data;
12287 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
12288 SET_PREV_INSN (i) = NULL;
12289 SET_NEXT_INSN (i) = NULL;
12291 /* If the comparison op doesn't have a result mode, set it to SImode. */
12292 machine_mode prev_op_mode = GET_MODE (op);
12293 if (COMPARISON_P (op) && prev_op_mode == VOIDmode)
12294 PUT_MODE (op, SImode);
12296 int result = recog (PATTERN (i), i, 0);
12298 /* It seems there is no insn like that. Create a negated version and
12299 try again. If we hit a negated form, we'll allow that and append a
12300 nott sequence when splitting out the insns. Insns that do the split
12301 can then remove the trailing nott if they know how to deal with it. */
12302 if (result < 0 && COMPARISON_P (op))
12304 machine_mode cmp_mode = GET_MODE (XEXP (op, 0));
12305 if (cmp_mode == VOIDmode)
12306 cmp_mode = GET_MODE (XEXP (op, 1));
12308 rtx_code prev_code = GET_CODE (op);
12309 PUT_CODE (op, reverse_condition (GET_CODE (op)));
12310 result = recog (PATTERN (i), i, 0);
12311 PUT_CODE (op, prev_code);
12314 PUT_MODE (op, prev_op_mode);
12315 recog_data = prev_recog_data;
12316 return result >= 0;
12319 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
12320 This can be used as a condition for insn/split patterns to allow certain
12321 T bit setting patters only to be matched as sub expressions of other
12322 patterns. */
12323 bool
12324 sh_in_recog_treg_set_expr (void)
12326 return sh_recog_treg_set_expr_reent_count > 0;
12329 /* Given an rtx x, which is assumed to be some expression that has been
12330 matched by the 'treg_set_expr' predicate before, split and emit the
12331 insns that are necessary to calculate the expression and store the result
12332 in the T bit.
12333 The splitting is done recursively similar to 'try_split' in emit-rt.c.
12334 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
12335 'delete_insn' which then causes the DF parts to bail out, because we
12336 currently are inside another gen_split* function and would invoke
12337 'try_split' in a reentrant way. */
12338 static std::pair<rtx_insn*, rtx_insn*>
12339 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
12341 if (dump_file)
12343 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
12344 print_rtl_single (dump_file, i);
12345 fprintf (dump_file, "\n");
12348 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
12350 if (seq == NULL)
12351 return std::make_pair (i, i);
12353 /* Avoid infinite splitter loops if any insn of the result matches
12354 the original pattern. */
12355 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
12356 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
12357 return std::make_pair (i, i);
12359 unshare_all_rtl_in_chain (seq);
12361 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
12362 a linked list, replace the single insn with the new insns. */
12363 rtx_insn* seqlast = seq;
12364 while (NEXT_INSN (seqlast) != NULL)
12365 seqlast = NEXT_INSN (seqlast);
12367 if (rtx_insn* iprev = PREV_INSN (i))
12368 SET_NEXT_INSN (iprev) = seq;
12369 if (rtx_insn* inext = NEXT_INSN (i))
12370 SET_PREV_INSN (inext) = seqlast;
12372 SET_PREV_INSN (seq) = PREV_INSN (i);
12373 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
12375 SET_PREV_INSN (i) = NULL;
12376 SET_NEXT_INSN (i) = NULL;
12378 /* Recursively split all insns. */
12379 for (i = seq; ; i = NEXT_INSN (i))
12381 std::pair<rtx_insn*, rtx_insn*> ii =
12382 sh_try_split_insn_simple (i, curr_insn, n + 1);
12383 if (i == seq)
12384 seq = ii.first;
12385 if (i == seqlast)
12387 seqlast = ii.second;
12388 break;
12390 i = ii.first;
12393 return std::make_pair (seq, seqlast);
12396 sh_treg_insns
12397 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
12399 if (t_reg_operand (x, VOIDmode))
12400 return sh_treg_insns ();
12402 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
12404 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
12405 SET_PREV_INSN (i) = NULL;
12406 SET_NEXT_INSN (i) = NULL;
12408 if (dump_file)
12410 fprintf (dump_file, "split_treg_set_expr insn:\n");
12411 print_rtl (dump_file, i);
12412 fprintf (dump_file, "\n");
12415 /* If the insn is not found, we will try a negated form and append
12416 a nott. */
12417 bool append_nott = false;
12419 /* We are going to invoke recog/split_insns in a re-entrant way and thus
12420 have to capture its current state and restore it afterwards. */
12421 recog_data_d prev_recog_data = recog_data;
12423 if (negt_reg_operand (x, GET_MODE (x)))
12425 /* This is a normal movt followed by a nott. It will be converted
12426 into a movrt after initial expansion. */
12427 XEXP (PATTERN (i), 1) = get_t_reg_rtx ();
12428 append_nott = true;
12430 else
12432 /* If the comparison op doesn't have a mode set, set it to SImode. */
12433 if (COMPARISON_P (x) && GET_MODE (x) == VOIDmode)
12434 PUT_MODE (x, SImode);
12436 int insn_code = recog (PATTERN (i), i, 0);
12438 if (insn_code < 0 && COMPARISON_P (x))
12440 machine_mode cmp_mode = GET_MODE (XEXP (x, 0));
12441 if (cmp_mode == VOIDmode)
12442 cmp_mode = GET_MODE (XEXP (x, 1));
12444 PUT_CODE (x, reverse_condition (GET_CODE (x)));
12445 insn_code = recog (PATTERN (i), i, 0);
12446 append_nott = true;
12449 gcc_assert (insn_code >= 0);
12452 /* Try to recursively split the insn. Some insns might refuse to split
12453 any further while we are in the treg_set_expr splitting phase. They
12454 will be emitted as part of the outer insn and then split again. */
12455 std::pair<rtx_insn*, rtx_insn*> insnlist =
12456 sh_try_split_insn_simple (i, curr_insn);
12458 /* Restore recog state. */
12459 recog_data = prev_recog_data;
12461 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
12462 ? insnlist.second
12463 : NULL;
12464 if (dump_file)
12466 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
12467 print_rtl (dump_file, insnlist.first);
12468 fprintf (dump_file, "\n");
12470 if (nott_insn != NULL)
12471 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
12474 emit_insn (insnlist.first);
12476 if (nott_insn != NULL && append_nott)
12478 if (dump_file)
12479 fprintf (dump_file, "removing trailing nott\n");
12480 remove_insn (nott_insn);
12481 nott_insn = NULL;
12482 append_nott = false;
12485 if (append_nott)
12486 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
12488 rtx_insn* first_insn = get_insns ();
12490 if (dump_file)
12492 fprintf (dump_file, "resulting insns:\n");
12493 print_rtl (dump_file, first_insn);
12494 fprintf (dump_file, "\n");
12497 return sh_treg_insns (first_insn, nott_insn);
12500 /*------------------------------------------------------------------------------
12501 Mode switching support code.
12504 static void
12505 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
12506 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
12508 if ((TARGET_SH4A_FP || TARGET_SH4_300)
12509 && prev_mode != FP_MODE_NONE && prev_mode != mode)
12511 emit_insn (gen_toggle_pr ());
12512 if (TARGET_FMOVD)
12513 emit_insn (gen_toggle_sz ());
12515 else if (mode != FP_MODE_NONE)
12517 rtx tmp = gen_reg_rtx (SImode);
12518 emit_insn (gen_sts_fpscr (tmp));
12519 rtx i = NULL;
12521 const unsigned HOST_WIDE_INT fpbits =
12522 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
12524 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
12525 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12526 else if (mode == FP_MODE_SINGLE)
12527 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
12528 else if (mode == FP_MODE_DOUBLE)
12529 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12530 else
12531 gcc_unreachable ();
12533 emit_insn (i);
12534 emit_insn (gen_lds_fpscr (tmp));
12538 static int
12539 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
12541 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
12544 static int
12545 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
12547 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
12548 get_attr_fp_set (insn) != FP_SET_NONE)
12549 return (int) get_attr_fp_set (insn);
12550 else
12551 return mode;
12554 static int
12555 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
12557 return NORMAL_MODE (entity);
12560 static int
12561 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
12563 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
12566 static int
12567 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
12569 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
12572 /*------------------------------------------------------------------------------
12573 Misc
12576 /* Return true if we use LRA instead of reload pass. */
12577 bool
12578 sh_lra_p (void)
12580 return sh_lra_flag;
12583 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
12585 static bool
12586 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
12587 unsigned int align,
12588 enum by_pieces_operation op,
12589 bool speed_p)
12591 switch (op)
12593 case MOVE_BY_PIECES:
12594 return by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1, op)
12595 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12596 case STORE_BY_PIECES:
12597 case SET_BY_PIECES:
12598 return by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1, op)
12599 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12600 default:
12601 return default_use_by_pieces_infrastructure_p (size, align,
12602 op, speed_p);
12606 bool
12607 sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
12608 rtx x ATTRIBUTE_UNUSED)
12610 return TARGET_FDPIC;
12613 /* Emit insns to load the function address from FUNCDESC (an FDPIC
12614 function descriptor) into r1 and the GOT address into r12,
12615 returning an rtx for r1. */
12618 sh_load_function_descriptor (rtx funcdesc)
12620 rtx r1 = gen_rtx_REG (Pmode, R1_REG);
12621 rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
12622 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
12623 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
12625 emit_move_insn (r1, fnaddr);
12626 /* The ABI requires the entry point address to be loaded first, so
12627 prevent the load from being moved after that of the GOT
12628 address. */
12629 emit_insn (gen_blockage ());
12630 emit_move_insn (pic_reg, gotaddr);
12631 return r1;
12634 /* Return an rtx holding the initial value of the FDPIC register (the
12635 FDPIC pointer passed in from the caller). */
12638 sh_get_fdpic_reg_initial_val (void)
12640 return get_hard_reg_initial_val (Pmode, PIC_REG);
12643 #include "gt-sh.h"