Add PR number
[official-gcc.git] / gcc / config / sh / sh.c
blob4a0d5bae57bfa77875648533fef86bd67dd4a524
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2018 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
24 #define IN_TARGET_CODE 1
26 #include "config.h"
27 #define INCLUDE_VECTOR
28 #include "system.h"
29 #include "coretypes.h"
30 #include "backend.h"
31 #include "target.h"
32 #include "rtl.h"
33 #include "tree.h"
34 #include "gimple.h"
35 #include "cfghooks.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "optabs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "flags.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "reload.h"
54 #include "output.h"
55 #include "insn-attr.h"
56 #include "dwarf2.h"
57 #include "langhooks.h"
58 #include "cfgrtl.h"
59 #include "intl.h"
60 #include "sched-int.h"
61 #include "gimplify.h"
62 #include "tm-constrs.h"
63 #include "opts.h"
64 #include "tree-pass.h"
65 #include "context.h"
66 #include "builtins.h"
67 #include "rtl-iter.h"
68 #include "regs.h"
69 #include "toplev.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
76 #define CONST_OK_FOR_ADD(size) CONST_OK_FOR_I08 (size)
77 #define GEN_MOV (*(gen_movsi))
78 #define GEN_ADD3 (*(gen_addsi3))
79 #define GEN_SUB3 (*(gen_subsi3))
81 /* Used to simplify the logic below. Find the attributes wherever
82 they may be. */
83 #define SH_ATTRIBUTES(decl) \
84 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
85 : DECL_ATTRIBUTES (decl) \
86 ? (DECL_ATTRIBUTES (decl)) \
87 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
89 /* Set to true by expand_prologue() when the function is an
90 interrupt handler. */
91 bool current_function_interrupt;
93 tree sh_deferred_function_attributes;
94 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
96 /* Global variables for machine-dependent things. */
98 /* Which cpu are we scheduling for. */
99 enum processor_type sh_cpu;
101 /* Definitions used in ready queue reordering for first scheduling pass. */
103 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
104 static short *regmode_weight[2];
106 /* Total SFmode and SImode weights of scheduled insns. */
107 static int curr_regmode_pressure[2];
109 /* Number of r0 life regions. */
110 static int r0_life_regions;
112 /* If true, skip cycles for Q -> R movement. */
113 static int skip_cycles = 0;
115 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
116 and returned from sh_reorder2. */
117 static short cached_can_issue_more;
119 /* Unique number for UNSPEC_BBR pattern. */
120 static unsigned int unspec_bbr_uid = 1;
122 /* Provides the class number of the smallest class containing
123 reg number. */
124 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
126 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
159 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
160 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
161 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
162 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
163 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
164 GENERAL_REGS, GENERAL_REGS,
167 char sh_register_names[FIRST_PSEUDO_REGISTER] \
168 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
170 char sh_additional_register_names[ADDREGNAMES_SIZE] \
171 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
172 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
174 int assembler_dialect;
176 static void split_branches (rtx_insn *);
177 static int branch_dest (rtx);
178 static void print_slot (rtx_sequence *);
179 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
180 static void dump_table (rtx_insn *, rtx_insn *);
181 static bool broken_move (rtx_insn *);
182 static bool mova_p (rtx_insn *);
183 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
184 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
185 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
186 static void sh_reorg (void);
187 static void sh_option_override (void);
188 static void sh_override_options_after_change (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
190 static rtx_insn* emit_frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET* mask, bool interrupt_handler);
194 static int calc_live_regs (HARD_REG_SET *);
195 static HOST_WIDE_INT rounded_frame_size (int);
196 static bool sh_frame_pointer_required (void);
197 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
198 static int sh_mode_needed (int, rtx_insn *);
199 static int sh_mode_after (int, int, rtx_insn *);
200 static int sh_mode_entry (int);
201 static int sh_mode_exit (int);
202 static int sh_mode_priority (int entity, int n);
204 static rtx mark_constant_pool_use (rtx);
205 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
206 int, bool *);
207 static tree sh_handle_resbank_handler_attribute (tree *, tree,
208 tree, int, bool *);
209 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
210 tree, int, bool *);
211 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
212 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
213 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
214 static void sh_print_operand (FILE *, rtx, int);
215 static void sh_print_operand_address (FILE *, machine_mode, rtx);
216 static bool sh_print_operand_punct_valid_p (unsigned char code);
217 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
218 static void sh_output_function_epilogue (FILE *);
219 static void sh_insert_attributes (tree, tree *);
220 static const char *sh_check_pch_target_flags (int);
221 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
222 static int sh_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
223 static int sh_issue_rate (void);
224 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
225 static short find_set_regmode_weight (rtx, machine_mode);
226 static short find_insn_regmode_weight (rtx, machine_mode);
227 static void find_regmode_weight (basic_block, machine_mode);
228 static int find_r0_life_regions (basic_block);
229 static void sh_md_init_global (FILE *, int, int);
230 static void sh_md_finish_global (FILE *, int);
231 static int rank_for_reorder (const void *, const void *);
232 static void swap_reorder (rtx_insn **, int);
233 static void ready_reorder (rtx_insn **, int);
234 static bool high_pressure (machine_mode);
235 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
236 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
237 static void sh_md_init (FILE *, int, int);
238 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
240 static bool sh_function_ok_for_sibcall (tree, tree);
242 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
243 static bool sh_ms_bitfield_layout_p (const_tree);
245 static void sh_init_builtins (void);
246 static tree sh_builtin_decl (unsigned, bool);
247 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
248 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
249 HOST_WIDE_INT, tree);
250 static void sh_file_start (void);
251 static bool sh_assemble_integer (rtx, unsigned int, int);
252 static bool flow_dependent_p (rtx, rtx);
253 static void flow_dependent_p_1 (rtx, const_rtx, void *);
254 static int shiftcosts (rtx);
255 static int and_xor_ior_costs (rtx, int);
256 static int addsubcosts (rtx);
257 static int multcosts (rtx);
258 static bool unspec_caller_rtx_p (rtx);
259 static bool sh_cannot_copy_insn_p (rtx_insn *);
260 static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
261 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
262 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
263 static int sh_pr_n_sets (void);
264 static rtx sh_allocate_initial_value (rtx);
265 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
266 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
267 machine_mode,
268 struct secondary_reload_info *);
269 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
270 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
271 static rtx sh_delegitimize_address (rtx);
272 static bool sh_cannot_substitute_mem_equiv_p (rtx);
273 static bool sh_legitimize_address_displacement (rtx *, rtx *,
274 poly_int64, machine_mode);
275 static int scavenge_reg (HARD_REG_SET *s);
277 static rtx sh_struct_value_rtx (tree, int);
278 static rtx sh_function_value (const_tree, const_tree, bool);
279 static bool sh_function_value_regno_p (const unsigned int);
280 static rtx sh_libcall_value (machine_mode, const_rtx);
281 static bool sh_return_in_memory (const_tree, const_tree);
282 static rtx sh_builtin_saveregs (void);
283 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
284 tree, int *, int);
285 static bool sh_strict_argument_naming (cumulative_args_t);
286 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
287 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
288 static tree sh_build_builtin_va_list (void);
289 static void sh_va_start (tree, rtx);
290 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
291 static bool sh_promote_prototypes (const_tree);
292 static machine_mode sh_promote_function_mode (const_tree type,
293 machine_mode,
294 int *punsignedp,
295 const_tree funtype,
296 int for_return);
297 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
298 const_tree, bool);
299 static bool sh_callee_copies (cumulative_args_t, machine_mode,
300 const_tree, bool);
301 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
302 tree, bool);
303 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
304 const_tree, bool);
305 static rtx sh_function_arg (cumulative_args_t, machine_mode,
306 const_tree, bool);
307 static int sh_dwarf_calling_convention (const_tree);
308 static void sh_encode_section_info (tree, rtx, int);
309 static bool sh2a_function_vector_p (tree);
310 static void sh_trampoline_init (rtx, tree, rtx);
311 static rtx sh_trampoline_adjust_address (rtx);
312 static void sh_conditional_register_usage (void);
313 static bool sh_legitimate_constant_p (machine_mode, rtx);
314 static int mov_insn_size (machine_mode, bool);
315 static int mov_insn_alignment_mask (machine_mode, bool);
316 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
317 unsigned int,
318 enum by_pieces_operation,
319 bool);
320 static bool sequence_insn_p (rtx_insn *);
321 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
322 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
323 machine_mode, bool);
324 static bool sh_legitimate_combined_insn (rtx_insn* insn);
326 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
328 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
329 static unsigned int sh_hard_regno_nregs (unsigned int, machine_mode);
330 static bool sh_hard_regno_mode_ok (unsigned int, machine_mode);
331 static bool sh_modes_tieable_p (machine_mode, machine_mode);
332 static bool sh_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
334 static const struct attribute_spec sh_attribute_table[] =
336 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
337 affects_type_identity, handler, exclude } */
338 { "interrupt_handler", 0, 0, true, false, false, false,
339 sh_handle_interrupt_handler_attribute, NULL },
340 { "sp_switch", 1, 1, true, false, false, false,
341 sh_handle_sp_switch_attribute, NULL },
342 { "trap_exit", 1, 1, true, false, false, false,
343 sh_handle_trap_exit_attribute, NULL },
344 { "renesas", 0, 0, false, true, false, false,
345 sh_handle_renesas_attribute, NULL },
346 { "trapa_handler", 0, 0, true, false, false, false,
347 sh_handle_interrupt_handler_attribute, NULL },
348 { "nosave_low_regs", 0, 0, true, false, false, false,
349 sh_handle_interrupt_handler_attribute, NULL },
350 { "resbank", 0, 0, true, false, false, false,
351 sh_handle_resbank_handler_attribute, NULL },
352 { "function_vector", 1, 1, true, false, false, false,
353 sh2a_handle_function_vector_handler_attribute, NULL },
354 { NULL, 0, 0, false, false, false, false, NULL, NULL }
357 /* Initialize the GCC target structure. */
358 #undef TARGET_ATTRIBUTE_TABLE
359 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
361 /* The next two are used for debug info when compiling with -gdwarf. */
362 #undef TARGET_ASM_UNALIGNED_HI_OP
363 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
364 #undef TARGET_ASM_UNALIGNED_SI_OP
365 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
367 #undef TARGET_OPTION_OVERRIDE
368 #define TARGET_OPTION_OVERRIDE sh_option_override
370 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
371 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
372 sh_override_options_after_change
374 #undef TARGET_PRINT_OPERAND
375 #define TARGET_PRINT_OPERAND sh_print_operand
376 #undef TARGET_PRINT_OPERAND_ADDRESS
377 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
378 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
379 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
380 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
381 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
383 #undef TARGET_ASM_FUNCTION_EPILOGUE
384 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
386 #undef TARGET_ASM_OUTPUT_MI_THUNK
387 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
389 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
390 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
391 hook_bool_const_tree_hwi_hwi_const_tree_true
393 #undef TARGET_ASM_FILE_START
394 #define TARGET_ASM_FILE_START sh_file_start
395 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
396 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
398 #undef TARGET_ASM_INTEGER
399 #define TARGET_ASM_INTEGER sh_assemble_integer
401 #undef TARGET_REGISTER_MOVE_COST
402 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
404 #undef TARGET_INSERT_ATTRIBUTES
405 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
407 #undef TARGET_SCHED_ADJUST_COST
408 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
410 #undef TARGET_SCHED_ISSUE_RATE
411 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
413 /* The next 5 hooks have been implemented for reenabling sched1. With the
414 help of these macros we are limiting the movement of insns in sched1 to
415 reduce the register pressure. The overall idea is to keep count of SImode
416 and SFmode regs required by already scheduled insns. When these counts
417 cross some threshold values; give priority to insns that free registers.
418 The insn that frees registers is most likely to be the insn with lowest
419 LUID (original insn order); but such an insn might be there in the stalled
420 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
421 up to a max of 8 cycles so that such insns may move from Q -> R.
423 The description of the hooks are as below:
425 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
426 scheduler; it is called inside the sched_init function just after
427 find_insn_reg_weights function call. It is used to calculate the SImode
428 and SFmode weights of insns of basic blocks; much similar to what
429 find_insn_reg_weights does.
430 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
432 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
433 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
434 (Q)->(R).
436 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
437 high; reorder the ready queue so that the insn with lowest LUID will be
438 issued next.
440 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
441 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
443 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
444 can be returned from TARGET_SCHED_REORDER2.
446 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
448 #undef TARGET_SCHED_DFA_NEW_CYCLE
449 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
451 #undef TARGET_SCHED_INIT_GLOBAL
452 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
454 #undef TARGET_SCHED_FINISH_GLOBAL
455 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
457 #undef TARGET_SCHED_VARIABLE_ISSUE
458 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
460 #undef TARGET_SCHED_REORDER
461 #define TARGET_SCHED_REORDER sh_reorder
463 #undef TARGET_SCHED_REORDER2
464 #define TARGET_SCHED_REORDER2 sh_reorder2
466 #undef TARGET_SCHED_INIT
467 #define TARGET_SCHED_INIT sh_md_init
469 #undef TARGET_DELEGITIMIZE_ADDRESS
470 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
472 #undef TARGET_LEGITIMIZE_ADDRESS
473 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
475 #undef TARGET_CAN_FOLLOW_JUMP
476 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
478 #undef TARGET_MS_BITFIELD_LAYOUT_P
479 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
481 #undef TARGET_INIT_BUILTINS
482 #define TARGET_INIT_BUILTINS sh_init_builtins
483 #undef TARGET_BUILTIN_DECL
484 #define TARGET_BUILTIN_DECL sh_builtin_decl
485 #undef TARGET_EXPAND_BUILTIN
486 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
488 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
489 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
491 #undef TARGET_CANNOT_COPY_INSN_P
492 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
493 #undef TARGET_RTX_COSTS
494 #define TARGET_RTX_COSTS sh_rtx_costs
495 #undef TARGET_ADDRESS_COST
496 #define TARGET_ADDRESS_COST sh_address_cost
497 #undef TARGET_ALLOCATE_INITIAL_VALUE
498 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
500 #undef TARGET_MACHINE_DEPENDENT_REORG
501 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
503 #undef TARGET_DWARF_REGISTER_SPAN
504 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
506 #ifdef HAVE_AS_TLS
507 #undef TARGET_HAVE_TLS
508 #define TARGET_HAVE_TLS true
509 #endif
511 #undef TARGET_PROMOTE_PROTOTYPES
512 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
513 #undef TARGET_PROMOTE_FUNCTION_MODE
514 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
516 #undef TARGET_FUNCTION_VALUE
517 #define TARGET_FUNCTION_VALUE sh_function_value
518 #undef TARGET_FUNCTION_VALUE_REGNO_P
519 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
520 #undef TARGET_LIBCALL_VALUE
521 #define TARGET_LIBCALL_VALUE sh_libcall_value
522 #undef TARGET_STRUCT_VALUE_RTX
523 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
524 #undef TARGET_RETURN_IN_MEMORY
525 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
527 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
528 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
529 #undef TARGET_SETUP_INCOMING_VARARGS
530 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
531 #undef TARGET_STRICT_ARGUMENT_NAMING
532 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
533 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
534 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
535 #undef TARGET_MUST_PASS_IN_STACK
536 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
537 #undef TARGET_PASS_BY_REFERENCE
538 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
539 #undef TARGET_CALLEE_COPIES
540 #define TARGET_CALLEE_COPIES sh_callee_copies
541 #undef TARGET_ARG_PARTIAL_BYTES
542 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
543 #undef TARGET_FUNCTION_ARG
544 #define TARGET_FUNCTION_ARG sh_function_arg
545 #undef TARGET_FUNCTION_ARG_ADVANCE
546 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
548 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
549 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
551 #undef TARGET_BUILD_BUILTIN_VA_LIST
552 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
553 #undef TARGET_EXPAND_BUILTIN_VA_START
554 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
555 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
556 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
558 #undef TARGET_VECTOR_MODE_SUPPORTED_P
559 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
561 #undef TARGET_CHECK_PCH_TARGET_FLAGS
562 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
564 #undef TARGET_DWARF_CALLING_CONVENTION
565 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
567 #undef TARGET_FRAME_POINTER_REQUIRED
568 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
570 #undef TARGET_MODE_EMIT
571 #define TARGET_MODE_EMIT sh_emit_mode_set
573 #undef TARGET_MODE_NEEDED
574 #define TARGET_MODE_NEEDED sh_mode_needed
576 #undef TARGET_MODE_AFTER
577 #define TARGET_MODE_AFTER sh_mode_after
579 #undef TARGET_MODE_ENTRY
580 #define TARGET_MODE_ENTRY sh_mode_entry
582 #undef TARGET_MODE_EXIT
583 #define TARGET_MODE_EXIT sh_mode_exit
585 #undef TARGET_MODE_PRIORITY
586 #define TARGET_MODE_PRIORITY sh_mode_priority
588 /* Return regmode weight for insn. */
589 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
590 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
592 /* Return current register pressure for regmode. */
593 #define CURR_REGMODE_PRESSURE(MODE)\
594 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
596 #undef TARGET_ENCODE_SECTION_INFO
597 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
599 #undef TARGET_LRA_P
600 #define TARGET_LRA_P sh_lra_p
602 #undef TARGET_SECONDARY_RELOAD
603 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
605 #undef TARGET_PREFERRED_RELOAD_CLASS
606 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
608 #undef TARGET_CONDITIONAL_REGISTER_USAGE
609 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
611 #undef TARGET_LEGITIMATE_ADDRESS_P
612 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
614 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
615 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
617 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
618 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
619 sh_legitimize_address_displacement
621 #undef TARGET_TRAMPOLINE_INIT
622 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
623 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
624 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
626 #undef TARGET_LEGITIMATE_CONSTANT_P
627 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
629 #undef TARGET_CANONICALIZE_COMPARISON
630 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
632 #undef TARGET_LEGITIMATE_COMBINED_INSN
633 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
635 #undef TARGET_FIXED_CONDITION_CODE_REGS
636 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
638 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
639 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
640 sh_use_by_pieces_infrastructure_p
642 /* Machine-specific symbol_ref flags. */
643 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
645 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
646 is used by optabs.c atomic op expansion code as well as in sync.md. */
647 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
648 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
650 #undef TARGET_CANNOT_FORCE_CONST_MEM
651 #define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
653 #undef TARGET_HARD_REGNO_NREGS
654 #define TARGET_HARD_REGNO_NREGS sh_hard_regno_nregs
655 #undef TARGET_HARD_REGNO_MODE_OK
656 #define TARGET_HARD_REGNO_MODE_OK sh_hard_regno_mode_ok
658 #undef TARGET_MODES_TIEABLE_P
659 #define TARGET_MODES_TIEABLE_P sh_modes_tieable_p
661 #undef TARGET_CAN_CHANGE_MODE_CLASS
662 #define TARGET_CAN_CHANGE_MODE_CLASS sh_can_change_mode_class
664 #undef TARGET_CONSTANT_ALIGNMENT
665 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
667 struct gcc_target targetm = TARGET_INITIALIZER;
670 /* Information on the currently selected atomic model.
671 This is initialized in sh_option_override. */
672 static sh_atomic_model selected_atomic_model_;
674 const sh_atomic_model&
675 selected_atomic_model (void)
677 return selected_atomic_model_;
680 static sh_atomic_model
681 parse_validate_atomic_model_option (const char* str)
683 const char* model_names[sh_atomic_model::num_models];
684 model_names[sh_atomic_model::none] = "none";
685 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
686 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
687 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
688 model_names[sh_atomic_model::soft_imask] = "soft-imask";
690 const char* model_cdef_names[sh_atomic_model::num_models];
691 model_cdef_names[sh_atomic_model::none] = "NONE";
692 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
693 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
694 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
695 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
697 sh_atomic_model ret;
698 ret.type = sh_atomic_model::none;
699 ret.name = model_names[sh_atomic_model::none];
700 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
701 ret.strict = false;
702 ret.tcb_gbr_offset = -1;
704 /* Handle empty string as 'none'. */
705 if (str == NULL || *str == '\0')
706 return ret;
708 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
710 std::vector<std::string> tokens;
711 for (std::stringstream ss (str); ss.good (); )
713 tokens.push_back (std::string ());
714 std::getline (ss, tokens.back (), ',');
717 if (tokens.empty ())
718 err_ret ("invalid atomic model option");
720 /* The first token must be the atomic model name. */
722 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
723 if (tokens.front () == model_names[i])
725 ret.type = (sh_atomic_model::enum_type)i;
726 ret.name = model_names[i];
727 ret.cdef_name = model_cdef_names[i];
728 goto got_mode_name;
731 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
732 got_mode_name:;
735 /* Go through the remaining tokens. */
736 for (size_t i = 1; i < tokens.size (); ++i)
738 if (tokens[i] == "strict")
739 ret.strict = true;
740 else if (tokens[i].find ("gbr-offset=") == 0)
742 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
743 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
744 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
745 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
746 "option", offset_str.c_str ());
748 else
749 err_ret ("unknown parameter \"%s\" in atomic model option",
750 tokens[i].c_str ());
753 /* Check that the selection makes sense. */
754 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
755 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
756 ret.name);
758 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
759 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
761 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
762 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
764 if (ret.type == sh_atomic_model::soft_tcb
765 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
766 || (ret.tcb_gbr_offset & 3) != 0))
767 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
768 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
769 ret.name);
771 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
772 err_ret ("cannot use atomic model %s in user mode", ret.name);
774 return ret;
776 #undef err_ret
779 /* Register SH specific RTL passes. */
780 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
781 const char* name);
782 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
783 const char* name);
784 static void
785 register_sh_passes (void)
787 /* Running the sh_treg_combine pass after ce1 generates better code when
788 comparisons are combined and reg-reg moves are introduced, because
789 reg-reg moves will be eliminated afterwards. However, there are quite
790 some cases where combine will be unable to fold comparison related insns,
791 thus for now don't do it.
792 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
793 PASS_POS_INSERT_AFTER, "ce1", 1);
796 /* Run sh_treg_combine pass after combine but before register allocation. */
797 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
798 PASS_POS_INSERT_AFTER, "split1", 1);
800 /* Run sh_treg_combine pass after register allocation and basic block
801 reordering as this sometimes creates new opportunities. */
802 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
803 PASS_POS_INSERT_AFTER, "split4", 1);
805 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
806 is known after a conditional branch.
807 This must be done after basic blocks and branch conditions have
808 stabilized and won't be changed by further passes. */
809 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
810 PASS_POS_INSERT_BEFORE, "sched2", 1);
813 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
814 various options, and do some machine dependent initialization. */
815 static void
816 sh_option_override (void)
818 int regno;
820 SUBTARGET_OVERRIDE_OPTIONS;
822 sh_cpu = PROCESSOR_SH1;
823 assembler_dialect = 0;
824 if (TARGET_SH2)
825 sh_cpu = PROCESSOR_SH2;
826 if (TARGET_SH2E)
827 sh_cpu = PROCESSOR_SH2E;
828 if (TARGET_SH2A)
829 sh_cpu = PROCESSOR_SH2A;
830 if (TARGET_SH3)
831 sh_cpu = PROCESSOR_SH3;
832 if (TARGET_SH3E)
833 sh_cpu = PROCESSOR_SH3E;
834 if (TARGET_SH4)
836 assembler_dialect = 1;
837 sh_cpu = PROCESSOR_SH4;
839 if (TARGET_SH4A)
841 assembler_dialect = 1;
842 sh_cpu = PROCESSOR_SH4A;
845 /* User/priviledged mode is supported only on SH3* and SH4*.
846 Disable it for everything else. */
847 if (!TARGET_SH3 && TARGET_USERMODE)
848 TARGET_USERMODE = false;
850 if (! strcmp (sh_div_str, "call-div1"))
851 sh_div_strategy = SH_DIV_CALL_DIV1;
852 else if (! strcmp (sh_div_str, "call-fp") && TARGET_FPU_ANY)
853 sh_div_strategy = SH_DIV_CALL_FP;
854 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
855 sh_div_strategy = SH_DIV_CALL_TABLE;
856 else
858 /* Pick one that makes most sense for the target in general.
859 It is not much good to use different functions depending on -Os,
860 since then we'll end up with two different functions when some of
861 the code is compiled for size, and some for speed. */
863 /* SH4 tends to emphasize speed. */
864 if (TARGET_HARD_SH4)
865 sh_div_strategy = SH_DIV_CALL_TABLE;
866 /* These have their own way of doing things. */
867 else if (TARGET_SH2A)
868 sh_div_strategy = SH_DIV_INTRINSIC;
869 /* SH1 .. SH3 cores often go into small-footprint systems, so
870 default to the smallest implementation available. */
871 else
872 sh_div_strategy = SH_DIV_CALL_DIV1;
875 if (sh_divsi3_libfunc[0])
876 ; /* User supplied - leave it alone. */
877 else if (TARGET_DIVIDE_CALL_FP)
878 sh_divsi3_libfunc = "__sdivsi3_i4";
879 else if (TARGET_DIVIDE_CALL_TABLE)
880 sh_divsi3_libfunc = "__sdivsi3_i4i";
881 else
882 sh_divsi3_libfunc = "__sdivsi3";
884 if (sh_branch_cost == -1)
886 /* The SH1 does not have delay slots, hence we get a pipeline stall
887 at every branch. The SH4 is superscalar, so the single delay slot
888 is not sufficient to keep both pipelines filled.
889 In any case, set the default branch cost to '2', as it results in
890 slightly overall smaller code and also enables some if conversions
891 that are required for matching special T bit related insns. */
892 sh_branch_cost = 2;
895 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
896 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
897 TARGET_ZDCBRANCH = 1;
899 /* FDPIC code is a special form of PIC, and the vast majority of code
900 generation constraints that apply to PIC also apply to FDPIC, so we
901 set flag_pic to avoid the need to check TARGET_FDPIC everywhere
902 flag_pic is checked. */
903 if (TARGET_FDPIC && !flag_pic)
904 flag_pic = 2;
906 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
907 if (! VALID_REGISTER_P (regno))
908 sh_register_names[regno][0] = '\0';
910 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
911 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
912 sh_additional_register_names[regno][0] = '\0';
914 if (flag_pic && ! TARGET_PREFERGOT)
915 flag_no_function_cse = 1;
917 if (targetm.small_register_classes_for_mode_p (VOIDmode))
919 /* Never run scheduling before reload, since that can
920 break global alloc, and generates slower code anyway due
921 to the pressure on R0. */
922 /* Enable sched1 for SH4 if the user explicitly requests.
923 When sched1 is enabled, the ready queue will be reordered by
924 the target hooks if pressure is high. We can not do this for
925 PIC, SH3 and lower as they give spill failures for R0. */
926 if (!TARGET_HARD_SH4 || flag_pic)
927 flag_schedule_insns = 0;
928 /* ??? Current exception handling places basic block boundaries
929 after call_insns. It causes the high pressure on R0 and gives
930 spill failures for R0 in reload. See PR 22553 and the thread
931 on gcc-patches
932 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
933 else if (flag_exceptions)
935 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
936 warning (0, "ignoring -fschedule-insns because of exception "
937 "handling bug");
938 flag_schedule_insns = 0;
940 else if (flag_schedule_insns
941 && !global_options_set.x_flag_schedule_insns)
942 flag_schedule_insns = 0;
945 /* Unwind info is not correct around the CFG unless either a frame
946 pointer is present or M_A_O_A is set. Fixing this requires rewriting
947 unwind info generation to be aware of the CFG and propagating states
948 around edges. */
949 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
950 || flag_exceptions || flag_non_call_exceptions)
951 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
953 warning (0, "unwind tables currently require either a frame pointer "
954 "or -maccumulate-outgoing-args for correctness");
955 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
958 if (flag_unsafe_math_optimizations)
960 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
961 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
962 TARGET_FSCA = 1;
964 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
965 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
966 TARGET_FSRRA = 1;
969 /* Allow fsrra insn only if -funsafe-math-optimizations and
970 -ffinite-math-only is enabled. */
971 TARGET_FSRRA = TARGET_FSRRA
972 && flag_unsafe_math_optimizations
973 && flag_finite_math_only;
975 /* If the -mieee option was not explicitly set by the user, turn it on
976 unless -ffinite-math-only was specified. See also PR 33135. */
977 if (! global_options_set.x_TARGET_IEEE)
978 TARGET_IEEE = ! flag_finite_math_only;
980 if (sh_fixed_range_str)
981 sh_fix_range (sh_fixed_range_str);
983 /* This target defaults to strict volatile bitfields. */
984 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
985 flag_strict_volatile_bitfields = 1;
987 sh_override_options_after_change ();
989 /* Parse atomic model option and make sure it is valid for the current
990 target CPU. */
991 selected_atomic_model_
992 = parse_validate_atomic_model_option (sh_atomic_model_str);
994 register_sh_passes ();
997 /* Implement targetm.override_options_after_change. */
999 static void
1000 sh_override_options_after_change (void)
1002 /* Adjust loop, jump and function alignment values (in bytes), if those
1003 were not specified by the user using -falign-loops, -falign-jumps
1004 and -falign-functions options.
1005 32 bit alignment is better for speed, because instructions can be
1006 fetched as a pair from a longword boundary. For size use 16 bit
1007 alignment to get more compact code.
1008 Aligning all jumps increases the code size, even if it might
1009 result in slightly faster code. Thus, it is set to the smallest
1010 alignment possible if not specified by the user. */
1011 if (flag_align_loops && !str_align_loops)
1012 str_align_loops = optimize_size ? "2" : "4";
1014 /* Parse values so that we can compare for current value. */
1015 parse_alignment_opts ();
1016 if (flag_align_jumps && !str_align_jumps)
1017 str_align_jumps = "2";
1018 else if (align_jumps.levels[0].get_value () < 2)
1019 str_align_jumps = "2";
1021 if (flag_align_functions && !str_align_functions)
1022 str_align_functions = optimize_size ? "2" : "4";
1024 /* The linker relaxation code breaks when a function contains
1025 alignments that are larger than that at the start of a
1026 compilation unit. */
1027 if (TARGET_RELAX)
1029 /* Parse values so that we can compare for current value. */
1030 parse_alignment_opts ();
1031 int min_align = MAX (align_loops.levels[0].get_value (),
1032 align_jumps.levels[0].get_value ());
1034 /* Also take possible .long constants / mova tables into account. */
1035 if (min_align < 4)
1036 min_align = 4;
1037 if (align_functions.levels[0].get_value () < min_align)
1039 char *r = XNEWVEC (char, 16);
1040 sprintf (r, "%d", min_align);
1041 str_align_functions = r;
1046 /* Print the operand address in x to the stream. */
1047 static void
1048 sh_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
1050 switch (GET_CODE (x))
1052 case REG:
1053 case SUBREG:
1054 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1055 break;
1057 case PLUS:
1059 rtx base = XEXP (x, 0);
1060 rtx index = XEXP (x, 1);
1062 switch (GET_CODE (index))
1064 case CONST_INT:
1065 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1066 reg_names[true_regnum (base)]);
1067 break;
1069 case REG:
1070 case SUBREG:
1072 int base_num = true_regnum (base);
1073 int index_num = true_regnum (index);
1075 /* If base or index is R0, make sure that it comes first.
1076 Usually one of them will be R0, but the order might be wrong.
1077 If neither base nor index are R0 it's an error and we just
1078 pass it on to the assembler. This avoids silent wrong code
1079 bugs. */
1080 if (base_num == 0 && index_num != 0)
1081 std::swap (base_num, index_num);
1083 fprintf (stream, "@(%s,%s)", reg_names[index_num],
1084 reg_names[base_num]);
1085 break;
1088 default:
1089 gcc_unreachable ();
1092 break;
1094 case PRE_DEC:
1095 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1096 break;
1098 case POST_INC:
1099 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1100 break;
1102 default:
1103 x = mark_constant_pool_use (x);
1104 output_addr_const (stream, x);
1105 break;
1109 /* Print operand x (an rtx) in assembler syntax to file stream
1110 according to modifier code.
1112 '.' print a .s if insn needs delay slot
1113 ',' print LOCAL_LABEL_PREFIX
1114 '@' print trap, rte or rts depending upon pragma interruptness
1115 '#' output a nop if there is nothing to put in the delay slot
1116 ''' print likelihood suffix (/u for unlikely).
1117 '>' print branch target if -fverbose-asm
1118 'O' print a constant without the #
1119 'R' print the LSW of a dp value - changes if in little endian
1120 'S' print the MSW of a dp value - changes if in little endian
1121 'T' print the next word of a dp value - same as 'R' in big endian mode.
1122 'M' print .b / .w / .l / .s / .d suffix if operand is a MEM.
1123 'N' print 'r63' if the operand is (const_int 0).
1124 'd' print a V2SF reg as dN instead of fpN.
1125 'm' print a pair `base,offset' or `base,index', for LD and ST.
1126 'U' Likewise for {LD,ST}{HI,LO}.
1127 'V' print the position of a single bit set.
1128 'W' print the position of a single bit cleared.
1129 't' print a memory address which is a register.
1130 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1131 'o' output an operator. */
1132 static void
1133 sh_print_operand (FILE *stream, rtx x, int code)
1135 int regno;
1136 machine_mode mode;
1138 switch (code)
1140 tree trapa_attr;
1142 case '.':
1143 if (final_sequence
1144 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1145 && get_attr_length (final_sequence->insn (1)))
1146 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1147 break;
1148 case ',':
1149 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1150 break;
1151 case '@':
1152 trapa_attr = lookup_attribute ("trap_exit",
1153 DECL_ATTRIBUTES (current_function_decl));
1154 if (trapa_attr)
1155 fprintf (stream, "trapa #%ld",
1156 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1157 else if (sh_cfun_interrupt_handler_p ())
1159 if (sh_cfun_resbank_handler_p ())
1160 fprintf (stream, "resbank\n");
1161 fprintf (stream, "rte");
1163 else
1164 fprintf (stream, "rts");
1165 break;
1166 case '#':
1167 /* Output a nop if there's nothing in the delay slot. */
1168 if (dbr_sequence_length () == 0)
1169 fprintf (stream, "\n\tnop");
1170 break;
1171 case '\'':
1173 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1175 if (note
1176 && profile_probability::from_reg_br_prob_note (XINT (note, 0))
1177 < profile_probability::even ())
1178 fputs ("/u", stream);
1179 break;
1181 case '>':
1182 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1184 fputs ("\t! target: ", stream);
1185 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1187 break;
1188 case 'O':
1189 x = mark_constant_pool_use (x);
1190 output_addr_const (stream, x);
1191 break;
1192 /* N.B.: %R / %S / %T adjust memory addresses by four.
1193 While they can be used to access 64 bit parts of a larger value
1194 held in general purpose registers, that won't work with memory -
1195 neither for fp registers, since the frxx names are used. */
1196 case 'R':
1197 if (REG_P (x) || GET_CODE (x) == SUBREG)
1199 regno = true_regnum (x);
1200 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1201 fputs (reg_names[regno], (stream));
1203 else if (MEM_P (x))
1205 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1206 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1208 else
1210 rtx sub = NULL_RTX;
1212 mode = GET_MODE (x);
1213 if (mode == VOIDmode)
1214 mode = DImode;
1215 if (GET_MODE_SIZE (mode) >= 8)
1216 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1217 if (sub)
1218 sh_print_operand (stream, sub, 0);
1219 else
1220 output_operand_lossage ("invalid operand to %%R");
1222 break;
1223 case 'S':
1224 if (REG_P (x) || GET_CODE (x) == SUBREG)
1226 regno = true_regnum (x);
1227 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1228 fputs (reg_names[regno], (stream));
1230 else if (MEM_P (x))
1232 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1233 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1235 else
1237 rtx sub = NULL_RTX;
1239 mode = GET_MODE (x);
1240 if (mode == VOIDmode)
1241 mode = DImode;
1242 if (GET_MODE_SIZE (mode) >= 8)
1243 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1244 if (sub)
1245 sh_print_operand (stream, sub, 0);
1246 else
1247 output_operand_lossage ("invalid operand to %%S");
1249 break;
1250 case 'T':
1251 /* Next word of a double. */
1252 switch (GET_CODE (x))
1254 case REG:
1255 fputs (reg_names[REGNO (x) + 1], (stream));
1256 break;
1257 case MEM:
1259 machine_mode mode = GET_MODE (x);
1260 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1261 && GET_CODE (XEXP (x, 0)) != POST_INC)
1262 x = adjust_address (x, SImode, 4);
1263 sh_print_operand_address (stream, mode, XEXP (x, 0));
1265 break;
1266 default:
1267 break;
1269 break;
1271 case 't':
1272 gcc_assert (MEM_P (x));
1273 x = XEXP (x, 0);
1274 switch (GET_CODE (x))
1276 case REG:
1277 case SUBREG:
1278 sh_print_operand (stream, x, 0);
1279 break;
1280 default:
1281 break;
1283 break;
1285 case 'o':
1286 switch (GET_CODE (x))
1288 case PLUS: fputs ("add", stream); break;
1289 case MINUS: fputs ("sub", stream); break;
1290 case MULT: fputs ("mul", stream); break;
1291 case DIV: fputs ("div", stream); break;
1292 case EQ: fputs ("eq", stream); break;
1293 case NE: fputs ("ne", stream); break;
1294 case GT: case LT: fputs ("gt", stream); break;
1295 case GE: case LE: fputs ("ge", stream); break;
1296 case GTU: case LTU: fputs ("gtu", stream); break;
1297 case GEU: case LEU: fputs ("geu", stream); break;
1298 default:
1299 break;
1301 break;
1302 case 'M':
1303 if (MEM_P (x))
1305 switch (GET_MODE (x))
1307 case E_QImode: fputs (".b", stream); break;
1308 case E_HImode: fputs (".w", stream); break;
1309 case E_SImode: fputs (".l", stream); break;
1310 case E_SFmode: fputs (".s", stream); break;
1311 case E_DFmode: fputs (".d", stream); break;
1312 default: gcc_unreachable ();
1315 break;
1317 case 'm':
1318 gcc_assert (MEM_P (x));
1319 x = XEXP (x, 0);
1320 /* Fall through. */
1321 case 'U':
1322 switch (GET_CODE (x))
1324 case REG:
1325 case SUBREG:
1326 sh_print_operand (stream, x, 0);
1327 fputs (", 0", stream);
1328 break;
1330 case PLUS:
1331 sh_print_operand (stream, XEXP (x, 0), 0);
1332 fputs (", ", stream);
1333 sh_print_operand (stream, XEXP (x, 1), 0);
1334 break;
1336 default:
1337 gcc_unreachable ();
1339 break;
1341 case 'V':
1343 int num = exact_log2 (INTVAL (x));
1344 gcc_assert (num >= 0);
1345 fprintf (stream, "#%d", num);
1347 break;
1349 case 'W':
1351 int num = exact_log2 (~INTVAL (x));
1352 gcc_assert (num >= 0);
1353 fprintf (stream, "#%d", num);
1355 break;
1357 case 'd':
1358 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1360 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1361 break;
1363 case 'N':
1364 if (x == CONST0_RTX (GET_MODE (x)))
1366 fprintf ((stream), "r63");
1367 break;
1369 goto default_output;
1370 case 'u':
1371 if (CONST_INT_P (x))
1373 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1374 break;
1376 /* Fall through. */
1378 default_output:
1379 default:
1380 regno = 0;
1381 mode = GET_MODE (x);
1383 switch (GET_CODE (x))
1385 case TRUNCATE:
1387 rtx inner = XEXP (x, 0);
1388 int offset = 0;
1389 machine_mode inner_mode;
1391 /* We might see SUBREGs with vector mode registers inside. */
1392 if (GET_CODE (inner) == SUBREG
1393 && (GET_MODE_SIZE (GET_MODE (inner))
1394 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1395 && subreg_lowpart_p (inner))
1396 inner = SUBREG_REG (inner);
1397 if (CONST_INT_P (inner))
1399 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1400 goto default_output;
1402 inner_mode = GET_MODE (inner);
1403 if (GET_CODE (inner) == SUBREG
1404 && (GET_MODE_SIZE (GET_MODE (inner))
1405 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1406 && REG_P (SUBREG_REG (inner)))
1408 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1409 GET_MODE (SUBREG_REG (inner)),
1410 SUBREG_BYTE (inner),
1411 GET_MODE (inner));
1412 inner = SUBREG_REG (inner);
1414 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1415 abort ();
1416 /* Floating point register pairs are always big endian;
1417 general purpose registers are 64 bit wide. */
1418 regno = REGNO (inner);
1419 regno = (hard_regno_nregs (regno, inner_mode)
1420 - hard_regno_nregs (regno, mode))
1421 + offset;
1422 x = inner;
1423 goto reg;
1425 case SIGN_EXTEND:
1426 x = XEXP (x, 0);
1427 goto reg;
1428 case SUBREG:
1429 gcc_assert (SUBREG_BYTE (x) == 0
1430 && REG_P (SUBREG_REG (x)));
1432 x = SUBREG_REG (x);
1433 /* Fall through. */
1435 reg:
1436 case REG:
1437 regno += REGNO (x);
1438 if (FP_REGISTER_P (regno)
1439 && mode == V16SFmode)
1440 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1441 else if (FP_REGISTER_P (REGNO (x))
1442 && mode == V4SFmode)
1443 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1444 else if (REG_P (x)
1445 && mode == V2SFmode)
1446 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1447 else if (FP_REGISTER_P (REGNO (x))
1448 && GET_MODE_SIZE (mode) > 4)
1449 fprintf ((stream), "d%s", reg_names[regno] + 1);
1450 else
1451 fputs (reg_names[regno], (stream));
1452 break;
1454 case MEM:
1455 output_address (GET_MODE (x), XEXP (x, 0));
1456 break;
1458 default:
1459 fputc ('#', stream);
1460 output_addr_const (stream, x);
1461 break;
1463 break;
1467 static bool
1468 sh_print_operand_punct_valid_p (unsigned char code)
1470 return (code == '.' || code == '#' || code == '@' || code == ','
1471 || code == '$' || code == '\'' || code == '>');
1474 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1475 static bool
1476 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1478 if (GET_CODE (x) == UNSPEC)
1480 switch (XINT (x, 1))
1482 case UNSPEC_PIC:
1483 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1484 output_addr_const (file, XVECEXP (x, 0, 0));
1485 break;
1486 case UNSPEC_GOT:
1487 output_addr_const (file, XVECEXP (x, 0, 0));
1488 fputs ("@GOT", file);
1489 break;
1490 case UNSPEC_GOTOFF:
1491 output_addr_const (file, XVECEXP (x, 0, 0));
1492 fputs ("@GOTOFF", file);
1493 break;
1494 case UNSPEC_PLT:
1495 output_addr_const (file, XVECEXP (x, 0, 0));
1496 fputs ("@PLT", file);
1497 break;
1498 case UNSPEC_GOTPLT:
1499 output_addr_const (file, XVECEXP (x, 0, 0));
1500 fputs ("@GOTPLT", file);
1501 break;
1502 case UNSPEC_PCREL:
1503 output_addr_const (file, XVECEXP (x, 0, 0));
1504 fputs ("@PCREL", file);
1505 break;
1506 case UNSPEC_DTPOFF:
1507 output_addr_const (file, XVECEXP (x, 0, 0));
1508 fputs ("@DTPOFF", file);
1509 break;
1510 case UNSPEC_GOTTPOFF:
1511 output_addr_const (file, XVECEXP (x, 0, 0));
1512 fputs ("@GOTTPOFF", file);
1513 break;
1514 case UNSPEC_TPOFF:
1515 output_addr_const (file, XVECEXP (x, 0, 0));
1516 fputs ("@TPOFF", file);
1517 break;
1518 case UNSPEC_CALLER:
1520 char name[32];
1521 /* LPCS stands for Label for PIC Call Site. */
1522 targetm.asm_out.generate_internal_label (name, "LPCS",
1523 INTVAL (XVECEXP (x, 0, 0)));
1524 assemble_name (file, name);
1526 break;
1527 case UNSPEC_SYMOFF:
1528 output_addr_const (file, XVECEXP (x, 0, 0));
1529 fputc ('-', file);
1530 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1532 fputc ('(', file);
1533 output_addr_const (file, XVECEXP (x, 0, 1));
1534 fputc (')', file);
1536 else
1537 output_addr_const (file, XVECEXP (x, 0, 1));
1538 break;
1539 case UNSPEC_PCREL_SYMOFF:
1540 output_addr_const (file, XVECEXP (x, 0, 0));
1541 fputs ("-(", file);
1542 output_addr_const (file, XVECEXP (x, 0, 1));
1543 fputs ("-.)", file);
1544 break;
1545 case UNSPEC_GOTFUNCDESC:
1546 output_addr_const (file, XVECEXP (x, 0, 0));
1547 fputs ("@GOTFUNCDESC", file);
1548 break;
1549 case UNSPEC_GOTOFFFUNCDESC:
1550 output_addr_const (file, XVECEXP (x, 0, 0));
1551 fputs ("@GOTOFFFUNCDESC", file);
1552 break;
1553 default:
1554 return false;
1556 return true;
1558 else
1559 return false;
1562 /* Encode symbol attributes of a SYMBOL_REF into its
1563 SYMBOL_REF_FLAGS. */
1564 static void
1565 sh_encode_section_info (tree decl, rtx rtl, int first)
1567 default_encode_section_info (decl, rtl, first);
1569 if (TREE_CODE (decl) == FUNCTION_DECL
1570 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1571 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1574 /* Prepare operands for a move define_expand; specifically, one of the
1575 operands must be in a register. */
1576 void
1577 prepare_move_operands (rtx operands[], machine_mode mode)
1579 if ((mode == SImode || mode == DImode)
1580 && flag_pic
1581 && ! ((mode == Pmode || mode == ptr_mode)
1582 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1584 rtx temp;
1585 if (SYMBOLIC_CONST_P (operands[1]))
1587 if (MEM_P (operands[0]))
1588 operands[1] = force_reg (Pmode, operands[1]);
1589 else
1591 temp = (!can_create_pseudo_p ()
1592 ? operands[0]
1593 : gen_reg_rtx (Pmode));
1594 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1597 else if (GET_CODE (operands[1]) == CONST
1598 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1599 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1601 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1602 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1603 mode, temp);
1604 operands[1] = expand_binop (mode, add_optab, temp,
1605 XEXP (XEXP (operands[1], 0), 1),
1606 (!can_create_pseudo_p ()
1607 ? temp
1608 : gen_reg_rtx (Pmode)),
1609 0, OPTAB_LIB_WIDEN);
1613 if (! reload_in_progress && ! reload_completed)
1615 /* Copy the source to a register if both operands aren't registers. */
1616 if (! register_operand (operands[0], mode)
1617 && ! register_operand (operands[1], mode))
1618 operands[1] = copy_to_mode_reg (mode, operands[1]);
1620 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1622 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1623 except that we can't use that function because it is static. */
1624 rtx new_rtx = change_address (operands[0], mode, 0);
1625 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1626 operands[0] = new_rtx;
1629 /* This case can happen while generating code to move the result
1630 of a library call to the target. Reject `st r0,@(rX,rY)' because
1631 reload will fail to find a spill register for rX, since r0 is already
1632 being used for the source. */
1633 else if (refers_to_regno_p (R0_REG, operands[1])
1634 && MEM_P (operands[0])
1635 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1636 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1637 operands[1] = copy_to_mode_reg (mode, operands[1]);
1639 /* When the displacement addressing is used, RA will assign r0 to
1640 the pseudo register operand for the QI/HImode load/store.
1641 This tends to make a long live range for R0 and might cause
1642 anomalous register spills in some case with LRA. See PR
1643 target/55212.
1644 We split possible load/store to two move insns via r0 so as to
1645 shorten R0 live range. It will make some codes worse but will
1646 win on average for LRA.
1647 Also when base+index addressing is used and the index term is
1648 a subreg, LRA assumes that more hard registers can be available
1649 in some situation. It isn't the case for SH in the problematic
1650 case. We can pre-allocate R0 for that index term to avoid
1651 the issue. See PR target/66591. */
1652 else if (sh_lra_p ()
1653 && ! TARGET_SH2A
1654 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1655 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1657 bool load_p = REG_P (operands[0]);
1658 rtx reg = operands[load_p ? 0 : 1];
1659 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1661 if ((mode == QImode || mode == HImode)
1662 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1663 && GET_CODE (adr) == PLUS
1664 && REG_P (XEXP (adr, 0))
1665 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1666 && CONST_INT_P (XEXP (adr, 1))
1667 && INTVAL (XEXP (adr, 1)) != 0
1668 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1670 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1671 emit_move_insn (r0_rtx, operands[1]);
1672 operands[1] = r0_rtx;
1674 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1675 && GET_CODE (adr) == PLUS
1676 && REG_P (XEXP (adr, 0))
1677 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1678 && SUBREG_P (XEXP (adr, 1))
1679 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1681 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1682 emit_move_insn (r0_rtx, XEXP (adr, 1));
1683 XEXP (adr, 1) = r0_rtx;
1688 if (mode == Pmode || mode == ptr_mode)
1690 rtx op0 = operands[0];
1691 rtx op1 = operands[1];
1692 rtx opc;
1693 if (GET_CODE (op1) == CONST
1694 && GET_CODE (XEXP (op1, 0)) == PLUS
1695 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1696 != TLS_MODEL_NONE))
1698 opc = XEXP (XEXP (op1, 0), 1);
1699 op1 = XEXP (XEXP (op1, 0), 0);
1701 else
1702 opc = NULL_RTX;
1704 enum tls_model tls_kind;
1706 if (! reload_in_progress && ! reload_completed
1707 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1709 rtx tga_op1, tga_ret, tmp, tmp2;
1711 if (! flag_pic
1712 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1713 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1714 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1716 static int got_labelno;
1717 /* Don't schedule insns for getting GOT address when
1718 the first scheduling is enabled, to avoid spill
1719 failures for R0. */
1720 if (flag_schedule_insns)
1721 emit_insn (gen_blockage ());
1722 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1723 emit_use (gen_rtx_REG (SImode, PIC_REG));
1724 if (flag_schedule_insns)
1725 emit_insn (gen_blockage ());
1728 switch (tls_kind)
1730 case TLS_MODEL_GLOBAL_DYNAMIC:
1731 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1732 if (TARGET_FDPIC)
1733 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1734 sh_get_fdpic_reg_initial_val ());
1735 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1736 tmp = gen_reg_rtx (Pmode);
1737 emit_move_insn (tmp, tga_ret);
1738 op1 = tmp;
1739 break;
1741 case TLS_MODEL_LOCAL_DYNAMIC:
1742 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1743 if (TARGET_FDPIC)
1744 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1745 sh_get_fdpic_reg_initial_val ());
1746 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1748 tmp = gen_reg_rtx (Pmode);
1749 emit_move_insn (tmp, tga_ret);
1751 if (register_operand (op0, Pmode))
1752 tmp2 = op0;
1753 else
1754 tmp2 = gen_reg_rtx (Pmode);
1756 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1757 op1 = tmp2;
1758 break;
1760 case TLS_MODEL_INITIAL_EXEC:
1761 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1762 tmp = gen_sym2GOTTPOFF (op1);
1763 if (TARGET_FDPIC)
1764 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1765 sh_get_fdpic_reg_initial_val ());
1766 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1767 op1 = tga_op1;
1768 break;
1770 case TLS_MODEL_LOCAL_EXEC:
1771 tmp2 = gen_reg_rtx (Pmode);
1772 emit_insn (gen_store_gbr (tmp2));
1773 tmp = gen_reg_rtx (Pmode);
1774 emit_insn (gen_symTPOFF2reg (tmp, op1));
1776 if (register_operand (op0, Pmode))
1777 op1 = op0;
1778 else
1779 op1 = gen_reg_rtx (Pmode);
1781 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1782 break;
1784 default:
1785 gcc_unreachable ();
1787 if (opc)
1788 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1789 operands[1] = op1;
1793 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
1795 rtx base, offset;
1796 split_const (operands[1], &base, &offset);
1798 if (GET_CODE (base) == SYMBOL_REF
1799 && !offset_within_block_p (base, INTVAL (offset)))
1801 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
1802 emit_move_insn (tmp, base);
1803 if (!arith_operand (offset, mode))
1804 offset = force_reg (mode, offset);
1805 emit_insn (gen_add3_insn (operands[0], tmp, offset));
1810 /* Implement the canonicalize_comparison target hook for the combine
1811 pass. For the target hook this function is invoked via
1812 sh_canonicalize_comparison. This function is also re-used to
1813 canonicalize comparisons in cbranch pattern expanders. */
1814 static void
1815 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1816 machine_mode mode,
1817 bool op0_preserve_value)
1819 /* When invoked from within the combine pass the mode is not specified,
1820 so try to get it from one of the operands. */
1821 if (mode == VOIDmode)
1822 mode = GET_MODE (op0);
1823 if (mode == VOIDmode)
1824 mode = GET_MODE (op1);
1826 // We need to have a mode to do something useful here.
1827 if (mode == VOIDmode)
1828 return;
1830 // Currently, we don't deal with floats here.
1831 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1832 return;
1834 // Make sure that the constant operand is the second operand.
1835 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1837 if (op0_preserve_value)
1838 return;
1840 std::swap (op0, op1);
1841 cmp = swap_condition (cmp);
1844 if (CONST_INT_P (op1))
1846 /* Try to adjust the constant operand in such a way that available
1847 comparison insns can be utilized better and the constant can be
1848 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1849 constant pool. */
1850 const HOST_WIDE_INT val = INTVAL (op1);
1852 /* x > -1 --> x >= 0
1853 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1854 x <= -1 --> x < 0
1855 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1856 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1858 cmp = cmp == GT ? GE : LT;
1859 op1 = gen_int_mode (val + 1, mode);
1862 /* x >= 1 --> x > 0
1863 x >= 0x80 --> x > 0x7F
1864 x < 1 --> x <= 0
1865 x < 0x80 --> x <= 0x7F */
1866 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1868 cmp = cmp == GE ? GT : LE;
1869 op1 = gen_int_mode (val - 1, mode);
1872 /* unsigned x >= 1 --> x != 0
1873 unsigned x < 1 --> x == 0 */
1874 else if (val == 1 && (cmp == GEU || cmp == LTU))
1876 cmp = cmp == GEU ? NE : EQ;
1877 op1 = CONST0_RTX (mode);
1880 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1881 unsigned x < 0x80 --> unsigned x < 0x7F */
1882 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1884 cmp = cmp == GEU ? GTU : LEU;
1885 op1 = gen_int_mode (val - 1, mode);
1888 /* unsigned x > 0 --> x != 0
1889 unsigned x <= 0 --> x == 0 */
1890 else if (val == 0 && (cmp == GTU || cmp == LEU))
1891 cmp = cmp == GTU ? NE : EQ;
1893 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1894 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1895 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1896 && val == 0x7FFFFFFF)
1898 cmp = cmp == GTU ? LT : GE;
1899 op1 = const0_rtx;
1902 /* unsigned x >= 0x80000000 --> signed x < 0
1903 unsigned x < 0x80000000 --> signed x >= 0 */
1904 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1905 && (unsigned HOST_WIDE_INT)val
1906 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1908 cmp = cmp == GEU ? LT : GE;
1909 op1 = const0_rtx;
1914 /* This function implements the canonicalize_comparison target hook.
1915 This wrapper around the internally used sh_canonicalize_comparison
1916 function is needed to do the enum rtx_code <-> int conversion.
1917 Target hooks cannot use enum rtx_code in its definition. */
1918 static void
1919 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1920 bool op0_preserve_value)
1922 enum rtx_code tmp_code = (enum rtx_code)*code;
1923 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1924 VOIDmode, op0_preserve_value);
1925 *code = (int)tmp_code;
1928 /* This function implements the legitimate_combined_insn target hook,
1929 which the combine pass uses to early reject combined insns, before
1930 it tries to recog the insn and determine its cost. */
1931 static bool
1932 sh_legitimate_combined_insn (rtx_insn* insn)
1934 /* Reject combinations of memory loads and zero extensions, as these
1935 interfere with other combine patterns such as zero extracts and bit
1936 tests. The SH2A movu.{b|w} insns are formed later in the
1937 'sh_optimize_extu_exts' pass after combine/split1. */
1938 rtx p = PATTERN (insn);
1939 if (GET_CODE (p) == SET
1940 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
1941 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
1942 && MEM_P (XEXP (XEXP (p, 1), 0)))
1943 return false;
1945 return true;
1948 bool
1949 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1951 *p1 = T_REG;
1952 *p2 = INVALID_REGNUM;
1953 return true;
1956 /* Try to calculate the branch distance of a conditional branch in bytes.
1958 FIXME: Because of PR 59189 we can't use the CFG here. Instead just
1959 walk from this insn into the next (fall-through) basic block and see if
1960 we hit the label. */
1961 unsigned int
1962 sh_cbranch_distance (rtx_insn* _cbranch_insn, unsigned int max_dist)
1964 rtx_jump_insn* cbranch_insn = safe_as_a<rtx_jump_insn*> (_cbranch_insn);
1966 if (dump_file)
1968 fprintf (dump_file, "sh_cbranch_distance insn = \n");
1969 print_rtl_single (dump_file, cbranch_insn);
1972 unsigned int dist = 0;
1974 for (rtx_insn* i = next_nonnote_insn (cbranch_insn);
1975 i != NULL && dist < max_dist; i = next_nonnote_insn (i))
1977 const unsigned int i_len = get_attr_length (i);
1978 dist += i_len;
1980 if (dump_file)
1981 fprintf (dump_file, " insn %d length = %u dist = %u\n",
1982 INSN_UID (i), i_len, dist);
1984 if (rtx_code_label* l = dyn_cast<rtx_code_label*> (i))
1986 if (l == cbranch_insn->jump_target ())
1988 if (dump_file)
1989 fprintf (dump_file, " cbranch dist = %u\n", dist);
1990 return dist;
1992 break;
1996 if (dump_file)
1997 fprintf (dump_file, " cbranch dist = unknown\n");
1999 return unknown_cbranch_distance;
2002 enum rtx_code
2003 prepare_cbranch_operands (rtx *operands, machine_mode mode,
2004 enum rtx_code comparison)
2006 gcc_assert (can_create_pseudo_p ());
2008 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2009 comparison = GET_CODE (operands[0]);
2011 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2012 mode, false);
2014 rtx op1 = operands[1];
2015 operands[1] = force_reg (mode, op1);
2017 /* When we are handling DImode comparisons, we want to keep constants so
2018 that we can optimize the component comparisons; however, memory loads
2019 are better issued as a whole so that they can be scheduled well.
2020 SImode equality comparisons allow I08 constants, but only when they
2021 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2022 into a register, that register might as well be r0, and we allow the
2023 constant. If it is already in a register, this is likely to be
2024 allocated to a different hard register, thus we load the constant into
2025 a register unless it is zero. */
2026 if (!REG_P (operands[2])
2027 && (!CONST_INT_P (operands[2])
2028 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2029 && ((comparison != EQ && comparison != NE)
2030 || (REG_P (op1) && REGNO (op1) != R0_REG)
2031 || !satisfies_constraint_I08 (operands[2])))))
2032 operands[2] = force_reg (mode, operands[2]);
2034 return comparison;
2037 static void
2038 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison,
2039 profile_probability probability)
2041 rtx (*branch_expander) (rtx) = gen_branch_true;
2042 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2043 switch (comparison)
2045 case NE: case LT: case LE: case LTU: case LEU:
2046 comparison = reverse_condition (comparison);
2047 branch_expander = gen_branch_false;
2048 default: ;
2050 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2051 gen_rtx_fmt_ee (comparison, SImode,
2052 operands[1], operands[2])));
2053 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2054 if (probability.initialized_p ())
2055 add_reg_br_prob_note (jump, probability);
2058 void
2059 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison)
2061 expand_cbranchsi4 (operands, comparison,
2062 profile_probability::uninitialized ());
2065 /* ??? How should we distribute probabilities when more than one branch
2066 is generated. So far we only have some ad-hoc observations:
2067 - If the operands are random, they are likely to differ in both parts.
2068 - If comparing items in a hash chain, the operands are random or equal;
2069 operation should be EQ or NE.
2070 - If items are searched in an ordered tree from the root, we can expect
2071 the highpart to be unequal about half of the time; operation should be
2072 an inequality comparison, operands non-constant, and overall probability
2073 about 50%. Likewise for quicksort.
2074 - Range checks will be often made against constants. Even if we assume for
2075 simplicity an even distribution of the non-constant operand over a
2076 sub-range here, the same probability could be generated with differently
2077 wide sub-ranges - as long as the ratio of the part of the subrange that
2078 is before the threshold to the part that comes after the threshold stays
2079 the same. Thus, we can't really tell anything here;
2080 assuming random distribution is at least simple.
2082 bool
2083 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2085 enum rtx_code msw_taken, msw_skip, lsw_taken;
2086 rtx_code_label *skip_label = NULL;
2087 rtx op1h, op1l, op2h, op2l;
2088 int num_branches;
2089 profile_probability prob, rev_prob;
2090 profile_probability msw_taken_prob = profile_probability::uninitialized (),
2091 msw_skip_prob = profile_probability::uninitialized (),
2092 lsw_taken_prob = profile_probability::uninitialized ();
2094 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2095 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2096 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2097 op1l = gen_lowpart (SImode, operands[1]);
2098 op2l = gen_lowpart (SImode, operands[2]);
2099 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2100 prob = split_branch_probability;
2101 rev_prob = prob.invert ();
2102 switch (comparison)
2104 case EQ:
2105 msw_skip = NE;
2106 lsw_taken = EQ;
2107 if (prob.initialized_p ())
2109 /* FIXME: This is not optimal. We do not really know the probablity
2110 that values differ by MCW only, but we should probably distribute
2111 probabilities more evenly. */
2112 msw_skip_prob = rev_prob;
2113 lsw_taken_prob = prob > profile_probability::never ()
2114 ? profile_probability::guessed_always ()
2115 : profile_probability::guessed_never ();
2117 break;
2118 case NE:
2119 msw_taken = NE;
2120 msw_taken_prob = prob;
2121 lsw_taken = NE;
2122 lsw_taken_prob = profile_probability::guessed_never ();
2123 break;
2124 case GTU: case GT:
2125 msw_taken = comparison;
2126 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2127 break;
2128 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2129 msw_skip = swap_condition (msw_taken);
2130 lsw_taken = GTU;
2131 break;
2132 case GEU: case GE:
2133 if (op2l == CONST0_RTX (SImode))
2134 msw_taken = comparison;
2135 else
2137 msw_taken = comparison == GE ? GT : GTU;
2138 msw_skip = swap_condition (msw_taken);
2139 lsw_taken = GEU;
2141 break;
2142 case LTU: case LT:
2143 msw_taken = comparison;
2144 if (op2l == CONST0_RTX (SImode))
2145 break;
2146 msw_skip = swap_condition (msw_taken);
2147 lsw_taken = LTU;
2148 break;
2149 case LEU: case LE:
2150 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2151 msw_taken = comparison;
2152 else
2154 lsw_taken = LEU;
2155 if (comparison == LE)
2156 msw_taken = LT;
2157 else if (op2h != CONST0_RTX (SImode))
2158 msw_taken = LTU;
2159 else
2161 msw_skip = swap_condition (LTU);
2162 break;
2164 msw_skip = swap_condition (msw_taken);
2166 break;
2167 default: return false;
2169 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2170 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2171 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2172 if (comparison != EQ && comparison != NE && num_branches > 1)
2174 if (!CONSTANT_P (operands[2])
2175 && prob.initialized_p ()
2176 && prob.to_reg_br_prob_base () >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2177 && prob.to_reg_br_prob_base () <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2179 msw_taken_prob = prob.apply_scale (1, 2);
2180 msw_skip_prob = rev_prob.apply_scale (REG_BR_PROB_BASE,
2181 rev_prob.to_reg_br_prob_base ()
2182 + REG_BR_PROB_BASE);
2183 lsw_taken_prob = prob;
2185 else
2187 msw_taken_prob = prob;
2188 msw_skip_prob = profile_probability::guessed_always ();
2189 /* ??? If we have a constant op2h, should we use that when
2190 calculating lsw_taken_prob? */
2191 lsw_taken_prob = prob;
2194 operands[1] = op1h;
2195 operands[2] = op2h;
2197 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2198 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2199 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2201 rtx taken_label = operands[3];
2203 /* Operands were possibly modified, but msw_skip doesn't expect this.
2204 Always use the original ones. */
2205 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2207 operands[1] = op1h;
2208 operands[2] = op2h;
2211 operands[3] = skip_label = gen_label_rtx ();
2212 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2213 operands[3] = taken_label;
2215 operands[1] = op1l;
2216 operands[2] = op2l;
2217 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2218 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2219 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2220 emit_label (skip_label);
2221 return true;
2224 /* Given an operand, return 1 if the evaluated operand plugged into an
2225 if_then_else will result in a branch_true, 0 if branch_false, or
2226 -1 if neither nor applies. The truth table goes like this:
2228 op | cmpval | code | result
2229 ---------+--------+---------+--------------------
2230 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2231 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2232 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2233 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2234 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2235 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2236 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2237 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2239 sh_eval_treg_value (rtx op)
2241 if (t_reg_operand (op, GET_MODE (op)))
2242 return 1;
2243 if (negt_reg_operand (op, GET_MODE (op)))
2244 return 0;
2246 rtx_code code = GET_CODE (op);
2247 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2248 return -1;
2250 int cmpop = code == EQ ? 1 : 0;
2251 int cmpval = INTVAL (XEXP (op, 1));
2252 if (cmpval != 0 && cmpval != 1)
2253 return -1;
2255 int t;
2256 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2257 t = 0;
2258 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2259 t = 1;
2260 else
2261 return -1;
2263 return t ^ (cmpval == cmpop);
2266 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2267 of floating-point comparisons. */
2268 static void
2269 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2271 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2272 && GET_CODE (insn) != PARALLEL)
2274 insn = gen_rtx_PARALLEL (VOIDmode,
2275 gen_rtvec (3, insn,
2276 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2277 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2279 emit_insn (insn);
2282 /* Prepare the operands for an scc instruction; make sure that the
2283 compare has been done and the result is in T_REG. */
2284 void
2285 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2287 rtx t_reg = get_t_reg_rtx ();
2288 enum rtx_code oldcode = code;
2290 /* First need a compare insn. */
2291 switch (code)
2293 case NE:
2294 /* It isn't possible to handle this case. */
2295 gcc_unreachable ();
2296 case LT:
2297 code = GT;
2298 break;
2299 case LE:
2300 code = GE;
2301 break;
2302 case LTU:
2303 code = GTU;
2304 break;
2305 case LEU:
2306 code = GEU;
2307 break;
2308 default:
2309 break;
2311 if (code != oldcode)
2312 std::swap (op0, op1);
2314 machine_mode mode = GET_MODE (op0);
2315 if (mode == VOIDmode)
2316 mode = GET_MODE (op1);
2318 op0 = force_reg (mode, op0);
2319 if ((code != EQ && code != NE
2320 && (op1 != const0_rtx
2321 || code == GTU || code == GEU || code == LTU || code == LEU))
2322 || (mode == DImode && op1 != const0_rtx)
2323 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2324 op1 = force_reg (mode, op1);
2326 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2327 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2328 mode);
2331 /* Called from the md file, set up the operands of a compare instruction. */
2332 void
2333 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2335 enum rtx_code code = GET_CODE (operands[0]);
2336 enum rtx_code branch_code;
2337 rtx op0 = operands[1];
2338 rtx op1 = operands[2];
2339 rtx insn;
2340 bool need_ccmpeq = false;
2342 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2344 op0 = force_reg (mode, op0);
2345 op1 = force_reg (mode, op1);
2347 else
2349 if (code != EQ || mode == DImode)
2351 /* Force args into regs, since we can't use constants here. */
2352 op0 = force_reg (mode, op0);
2353 if (op1 != const0_rtx || code == GTU || code == GEU)
2354 op1 = force_reg (mode, op1);
2358 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2360 if (code == LT
2361 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2362 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2364 std::swap (op0, op1);
2365 code = swap_condition (code);
2368 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2369 if (code == GE)
2371 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2372 need_ccmpeq = true;
2373 code = GT;
2376 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2377 to EQ/GT respectively. */
2378 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2381 switch (code)
2383 case EQ:
2384 case GT:
2385 case GE:
2386 case GTU:
2387 case GEU:
2388 branch_code = code;
2389 break;
2390 case NE:
2391 case LT:
2392 case LE:
2393 case LTU:
2394 case LEU:
2395 branch_code = reverse_condition (code);
2396 break;
2397 default:
2398 gcc_unreachable ();
2401 insn = gen_rtx_SET (get_t_reg_rtx (),
2402 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2404 sh_emit_set_t_insn (insn, mode);
2405 if (need_ccmpeq)
2406 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2408 if (branch_code == code)
2409 emit_jump_insn (gen_branch_true (operands[3]));
2410 else
2411 emit_jump_insn (gen_branch_false (operands[3]));
2414 void
2415 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2417 enum rtx_code code = GET_CODE (operands[1]);
2418 rtx op0 = operands[2];
2419 rtx op1 = operands[3];
2420 rtx_code_label *lab = NULL;
2421 bool invert = false;
2423 op0 = force_reg (mode, op0);
2424 if ((code != EQ && code != NE
2425 && (op1 != const0_rtx
2426 || code == GTU || code == GEU || code == LTU || code == LEU))
2427 || (mode == DImode && op1 != const0_rtx)
2428 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2429 op1 = force_reg (mode, op1);
2431 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2433 if (code == LT || code == LE)
2435 std::swap (op0, op1);
2436 code = swap_condition (code);
2438 if (code == GE)
2440 if (TARGET_IEEE)
2442 lab = gen_label_rtx ();
2443 sh_emit_scc_to_t (EQ, op0, op1);
2444 emit_jump_insn (gen_branch_true (lab));
2445 code = GT;
2447 else
2449 code = LT;
2450 invert = true;
2455 if (code == NE)
2457 code = EQ;
2458 invert = true;
2461 sh_emit_scc_to_t (code, op0, op1);
2462 if (lab)
2463 emit_label (lab);
2464 if (invert)
2465 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2466 else
2467 emit_move_insn (operands[0], get_t_reg_rtx ());
2470 /* Functions to output assembly code. */
2472 /* Return a sequence of instructions to perform DI or DF move.
2474 Since the SH cannot move a DI or DF in one instruction, we have
2475 to take care when we see overlapping source and dest registers. */
2476 const char *
2477 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2478 machine_mode mode)
2480 rtx dst = operands[0];
2481 rtx src = operands[1];
2483 if (MEM_P (dst)
2484 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2485 return "mov.l %T1,%0" "\n"
2486 " mov.l %1,%0";
2488 if (register_operand (dst, mode)
2489 && register_operand (src, mode))
2491 if (REGNO (src) == MACH_REG)
2492 return "sts mach,%S0" "\n"
2493 " sts macl,%R0";
2495 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2496 when mov.d r1,r0 do r1->r0 then r2->r1. */
2497 if (REGNO (src) + 1 == REGNO (dst))
2498 return "mov %T1,%T0" "\n"
2499 " mov %1,%0";
2500 else
2501 return "mov %1,%0" "\n"
2502 " mov %T1,%T0";
2504 else if (CONST_INT_P (src))
2506 if (INTVAL (src) < 0)
2507 output_asm_insn ("mov #-1,%S0", operands);
2508 else
2509 output_asm_insn ("mov #0,%S0", operands);
2511 return "mov %1,%R0";
2513 else if (MEM_P (src))
2515 int ptrreg = -1;
2516 int dreg = REGNO (dst);
2517 rtx inside = XEXP (src, 0);
2519 switch (GET_CODE (inside))
2521 case REG:
2522 ptrreg = REGNO (inside);
2523 break;
2525 case SUBREG:
2526 ptrreg = subreg_regno (inside);
2527 break;
2529 case PLUS:
2530 ptrreg = REGNO (XEXP (inside, 0));
2531 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2532 an offsettable address. Unfortunately, offsettable addresses use
2533 QImode to check the offset, and a QImode offsettable address
2534 requires r0 for the other operand, which is not currently
2535 supported, so we can't use the 'o' constraint.
2536 Thus we must check for and handle r0+REG addresses here.
2537 We punt for now, since this is likely very rare. */
2538 gcc_assert (!REG_P (XEXP (inside, 1)));
2539 break;
2541 case LABEL_REF:
2542 return "mov.l %1,%0" "\n"
2543 " mov.l %1+4,%T0";
2544 case POST_INC:
2545 return "mov.l %1,%0" "\n"
2546 " mov.l %1,%T0";
2547 default:
2548 gcc_unreachable ();
2551 /* Work out the safe way to copy. Copy into the second half first. */
2552 if (dreg == ptrreg)
2553 return "mov.l %T1,%T0" "\n"
2554 " mov.l %1,%0";
2557 return "mov.l %1,%0" "\n"
2558 " mov.l %T1,%T0";
2561 /* Print an instruction which would have gone into a delay slot after
2562 another instruction, but couldn't because the other instruction expanded
2563 into a sequence where putting the slot insn at the end wouldn't work. */
2564 static void
2565 print_slot (rtx_sequence *seq)
2567 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2569 seq->insn (1)->set_deleted ();
2572 const char *
2573 output_far_jump (rtx_insn *insn, rtx op)
2575 struct { rtx lab, reg, op; } this_jmp;
2576 rtx_code_label *braf_base_lab = NULL;
2577 const char *jump;
2578 int far;
2579 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2580 rtx_insn *prev;
2582 this_jmp.lab = gen_label_rtx ();
2584 if (TARGET_SH2
2585 && offset >= -32764
2586 && offset - get_attr_length (insn) <= 32766
2587 && ! CROSSING_JUMP_P (insn))
2589 far = 0;
2590 jump = "mov.w %O0,%1" "\n"
2591 " braf %1";
2593 else
2595 far = 1;
2596 if (flag_pic)
2598 if (TARGET_SH2)
2599 jump = "mov.l %O0,%1" "\n"
2600 " braf %1";
2601 else
2602 jump = "mov.l r0,@-r15" "\n"
2603 " mova %O0,r0" "\n"
2604 " mov.l @r0,%1" "\n"
2605 " add r0,%1" "\n"
2606 " mov.l @r15+,r0" "\n"
2607 " jmp @%1";
2609 else
2610 jump = "mov.l %O0,%1" "\n"
2611 " jmp @%1";
2613 /* If we have a scratch register available, use it. */
2614 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2615 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2617 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2618 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2619 jump = "mov.l r1,@-r15" "\n"
2620 " mova %O0,r0" "\n"
2621 " mov.l @r0,r1" "\n"
2622 " add r1,r0" "\n"
2623 " mov.l @r15+,r1" "\n"
2624 " jmp @%1";
2625 output_asm_insn (jump, &this_jmp.lab);
2626 if (dbr_sequence_length ())
2627 print_slot (final_sequence);
2628 else
2629 output_asm_insn ("nop", 0);
2631 else
2633 /* Output the delay slot insn first if any. */
2634 if (dbr_sequence_length ())
2635 print_slot (final_sequence);
2637 this_jmp.reg = gen_rtx_REG (SImode, 13);
2638 output_asm_insn ("mov.l r13,@-r15", 0);
2639 output_asm_insn (jump, &this_jmp.lab);
2640 output_asm_insn ("mov.l @r15+,r13", 0);
2642 if (far && flag_pic && TARGET_SH2)
2644 braf_base_lab = gen_label_rtx ();
2645 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2646 CODE_LABEL_NUMBER (braf_base_lab));
2648 if (far)
2649 output_asm_insn (".align 2", 0);
2650 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2651 this_jmp.op = op;
2652 if (far && flag_pic)
2654 if (TARGET_SH2)
2655 this_jmp.lab = braf_base_lab;
2656 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2658 else
2659 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2660 return "";
2663 /* Local label counter, used for constants in the pool and inside
2664 pattern branches. */
2665 static int lf = 100;
2667 /* Output code for ordinary branches. */
2668 const char *
2669 output_branch (int logic, rtx_insn *insn, rtx *operands)
2671 switch (get_attr_length (insn))
2673 case 6:
2674 /* This can happen if filling the delay slot has caused a forward
2675 branch to exceed its range (we could reverse it, but only
2676 when we know we won't overextend other branches; this should
2677 best be handled by relaxation).
2678 It can also happen when other condbranches hoist delay slot insn
2679 from their destination, thus leading to code size increase.
2680 But the branch will still be in the range -4092..+4098 bytes. */
2681 if (! TARGET_RELAX)
2683 int label = lf++;
2684 /* The call to print_slot will clobber the operands. */
2685 rtx op0 = operands[0];
2687 /* If the instruction in the delay slot is annulled (true), then
2688 there is no delay slot where we can put it now. The only safe
2689 place for it is after the label. final will do that by default. */
2691 if (final_sequence
2692 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2693 && get_attr_length (final_sequence->insn (1)))
2695 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2696 ASSEMBLER_DIALECT ? "/" : ".", label);
2697 print_slot (final_sequence);
2699 else
2700 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2702 output_asm_insn ("bra\t%l0", &op0);
2703 fprintf (asm_out_file, "\tnop\n");
2704 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2706 return "";
2708 /* FALLTHRU */
2709 /* When relaxing, handle this like a short branch. The linker
2710 will fix it up if it still doesn't fit after relaxation. */
2711 case 2:
2712 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2714 /* These are for SH2e, in which we have to account for the
2715 extra nop because of the hardware bug in annulled branches. */
2716 case 8:
2717 if (! TARGET_RELAX)
2719 int label = lf++;
2721 gcc_assert (!final_sequence
2722 || !(INSN_ANNULLED_BRANCH_P
2723 (XVECEXP (final_sequence, 0, 0))));
2724 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2725 logic ? "f" : "t",
2726 ASSEMBLER_DIALECT ? "/" : ".", label);
2727 fprintf (asm_out_file, "\tnop\n");
2728 output_asm_insn ("bra\t%l0", operands);
2729 fprintf (asm_out_file, "\tnop\n");
2730 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2732 return "";
2734 /* FALLTHRU */
2735 case 4:
2737 char buffer[10];
2739 sprintf (buffer, "b%s%ss\t%%l0",
2740 logic ? "t" : "f",
2741 ASSEMBLER_DIALECT ? "/" : ".");
2742 output_asm_insn (buffer, &operands[0]);
2743 return "nop";
2746 default:
2747 /* There should be no longer branches now - that would
2748 indicate that something has destroyed the branches set
2749 up in machine_dependent_reorg. */
2750 gcc_unreachable ();
2754 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2755 fill in operands 9 as a label to the successor insn.
2756 We try to use jump threading where possible.
2757 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2758 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2759 follow jmp and bt, if the address is in range. */
2760 const char *
2761 output_branchy_insn (enum rtx_code code, const char *templ,
2762 rtx_insn *insn, rtx *operands)
2764 rtx_insn *next_insn = NEXT_INSN (insn);
2766 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2768 rtx src = SET_SRC (PATTERN (next_insn));
2769 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2771 /* Following branch not taken */
2772 rtx_code_label *lab = gen_label_rtx ();
2773 emit_label_after (lab, next_insn);
2774 INSN_ADDRESSES_NEW (lab,
2775 INSN_ADDRESSES (INSN_UID (next_insn))
2776 + get_attr_length (next_insn));
2777 operands[9] = lab;
2778 return templ;
2780 else
2782 int offset = (branch_dest (next_insn)
2783 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2784 if (offset >= -252 && offset <= 258)
2786 if (GET_CODE (src) == IF_THEN_ELSE)
2787 /* branch_true */
2788 src = XEXP (src, 1);
2789 operands[9] = src;
2790 return templ;
2794 rtx_code_label *lab = gen_label_rtx ();
2795 emit_label_after (lab, insn);
2796 INSN_ADDRESSES_NEW (lab,
2797 INSN_ADDRESSES (INSN_UID (insn))
2798 + get_attr_length (insn));
2799 operands[9] = lab;
2800 return templ;
2803 const char *
2804 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2806 return output_branchy_insn (NE, "bt %l9" "\n"
2807 " fcmp/eq %1,%0",
2808 insn, operands);
2811 /* Output the start of the assembler file. */
2812 static void
2813 sh_file_start (void)
2815 default_file_start ();
2817 if (TARGET_ELF)
2818 /* We need to show the text section with the proper
2819 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2820 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2821 will complain. We can teach GAS specifically about the
2822 default attributes for our choice of text section, but
2823 then we would have to change GAS again if/when we change
2824 the text section name. */
2825 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2826 else
2827 /* Switch to the data section so that the coffsem symbol
2828 isn't in the text section. */
2829 switch_to_section (data_section);
2831 if (TARGET_LITTLE_ENDIAN)
2832 fputs ("\t.little\n", asm_out_file);
2835 /* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions
2836 need to be output as pointers to function descriptors for
2837 FDPIC. */
2839 static bool
2840 sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
2842 if (TARGET_FDPIC && size == UNITS_PER_WORD
2843 && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value))
2845 fputs ("\t.long\t", asm_out_file);
2846 output_addr_const (asm_out_file, value);
2847 fputs ("@FUNCDESC\n", asm_out_file);
2848 return true;
2850 return default_assemble_integer (value, size, aligned_p);
2853 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2854 static bool
2855 unspec_caller_rtx_p (rtx pat)
2857 rtx base, offset;
2858 split_const (pat, &base, &offset);
2860 if (GET_CODE (base) == UNSPEC)
2862 if (XINT (base, 1) == UNSPEC_CALLER)
2863 return true;
2864 for (int i = 0; i < XVECLEN (base, 0); i++)
2865 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2866 return true;
2868 return false;
2871 /* Indicate that INSN cannot be duplicated. This is true for insn
2872 that generates a unique label. */
2873 static bool
2874 sh_cannot_copy_insn_p (rtx_insn *insn)
2876 if (!reload_completed || !flag_pic)
2877 return false;
2879 if (!NONJUMP_INSN_P (insn))
2880 return false;
2881 if (asm_noperands (insn) >= 0)
2882 return false;
2884 rtx pat = PATTERN (insn);
2886 if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
2887 return false;
2889 if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL)
2891 rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
2892 if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0)))
2893 return true;
2896 if (GET_CODE (pat) != SET)
2897 return false;
2898 pat = SET_SRC (pat);
2900 if (unspec_caller_rtx_p (pat))
2901 return true;
2903 return false;
2906 /* Number of instructions used to make an arithmetic right shift by N. */
2907 static const char ashiftrt_insns[] =
2908 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2910 /* Description of a logical left or right shift, when expanded to a sequence
2911 of 1/2/8/16 shifts.
2912 Notice that one bit right shifts clobber the T bit. One bit left shifts
2913 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2914 enum
2916 ASHL_CLOBBERS_T = 1 << 0,
2917 LSHR_CLOBBERS_T = 1 << 1
2920 struct ashl_lshr_sequence
2922 char insn_count;
2923 signed char amount[6];
2924 char clobbers_t;
2927 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2929 { 0, { 0 }, 0 }, // 0
2930 { 1, { 1 }, LSHR_CLOBBERS_T },
2931 { 1, { 2 }, 0 },
2932 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2933 { 2, { 2, 2 }, 0 }, // 4
2934 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2935 { 3, { 2, 2, 2 }, 0 },
2936 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2937 { 1, { 8 }, 0 }, // 8
2938 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2939 { 2, { 8, 2 }, 0 },
2940 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2941 { 3, { 8, 2, 2 }, 0 }, // 12
2942 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2943 { 3, { 8, -2, 8 }, 0 },
2944 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2945 { 1, { 16 }, 0 }, // 16
2946 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2947 { 2, { 16, 2 }, 0 },
2948 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2949 { 3, { 16, 2, 2 }, 0 }, // 20
2950 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2951 { 3, { 16, -2, 8 }, 0 },
2952 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2953 { 2, { 16, 8 }, 0 }, // 24
2954 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2955 { 3, { 16, 8, 2 }, 0 },
2956 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2957 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2958 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2959 { 3, { 16, -2, 16 }, 0 },
2961 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2962 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2963 However, the shift-and combiner code needs this entry here to be in
2964 terms of real shift insns. */
2965 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2968 /* Individual shift amounts for shift amounts < 16, up to three highmost
2969 bits might be clobbered. This is typically used when combined with some
2970 kind of sign or zero extension. */
2971 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
2973 { 0, { 0 }, 0 }, // 0
2974 { 1, { 1 }, LSHR_CLOBBERS_T },
2975 { 1, { 2 }, 0 },
2976 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2977 { 2, { 2, 2 }, 0 }, // 4
2978 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2979 { 2, { 8, -2 }, 0 },
2980 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
2981 { 1, { 8 }, 0 }, // 8
2982 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2983 { 2, { 8, 2 }, 0 },
2984 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2985 { 3, { 8, 2, 2 }, 0 }, // 12
2986 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
2987 { 2, { 16, -2 }, 0 },
2988 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
2989 { 1, { 16 }, 0 }, // 16
2990 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2991 { 2, { 16, 2 }, 0 },
2992 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2993 { 3, { 16, 2, 2 }, 0 }, // 20
2994 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2995 { 3, { 16, -2, 8 }, 0 },
2996 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2997 { 2, { 16, 8 }, 0 }, // 24
2998 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2999 { 3, { 16, 8, 2 }, 0 },
3000 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3001 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3002 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3003 { 3, { 16, -2, 16 }, 0 },
3004 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3007 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3008 will clobber the T bit. */
3009 bool
3010 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3012 gcc_assert (CONST_INT_P (shift_amount));
3014 const int shift_amount_i = INTVAL (shift_amount) & 31;
3016 /* Special case for shift count of 31: use and-rotl sequence. */
3017 if (shift_amount_i == 31)
3018 return true;
3020 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3021 & ASHL_CLOBBERS_T) != 0;
3024 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3025 instructions will clobber the T bit. */
3026 bool
3027 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3029 gcc_assert (CONST_INT_P (shift_amount));
3031 /* For right shifts the constant might be negative. */
3032 const int shift_amount_i = std::abs (INTVAL (shift_amount)) & 31;
3034 /* Special case for shift count of 31: use shll-movt sequence. */
3035 if (shift_amount_i == 31)
3036 return true;
3038 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3039 & LSHR_CLOBBERS_T) != 0;
3042 /* Return true if it is potentially beneficial to use a dynamic shift
3043 instruction (shad / shar) instead of a combination of 1/2/8/16
3044 shift instructions for the specified shift count.
3045 If dynamic shifts are not available, always return false. */
3046 bool
3047 sh_dynamicalize_shift_p (rtx count)
3049 gcc_assert (CONST_INT_P (count));
3051 /* For right shifts the constant might be negative. */
3052 const int shift_amount_i = std::abs (INTVAL (count)) & 31;
3053 int insn_count;
3055 /* For left and right shifts, there are shorter 2 insn sequences for
3056 shift amounts of 31. */
3057 if (shift_amount_i == 31)
3058 insn_count = 2;
3059 else
3060 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3062 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3065 /* Assuming we have a value that has been sign-extended by at least one bit,
3066 can we use the ext_shift_amounts with the last shift turned to an
3067 arithmetic shift to shift it by N without data loss, and quicker than by
3068 other means? */
3069 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3071 /* Return the cost of a shift. */
3072 static inline int
3073 shiftcosts (rtx x)
3075 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3077 if (GET_MODE (x) == DImode
3078 && CONST_INT_P (XEXP (x, 1))
3079 && INTVAL (XEXP (x, 1)) == 1)
3080 return 2;
3082 /* Everything else is invalid, because there is no pattern for it. */
3083 return -1;
3085 /* If shift by a non constant, then this will be expensive. */
3086 if (!CONST_INT_P (XEXP (x, 1)))
3087 return SH_DYNAMIC_SHIFT_COST;
3089 /* Otherwise, return the true cost in instructions. Cope with out of range
3090 shift counts more or less arbitrarily. */
3091 int value = INTVAL (XEXP (x, 1)) & 31;
3093 if (GET_CODE (x) == ASHIFTRT)
3095 int cost = ashiftrt_insns[value];
3096 /* If dynamic shifts are available and profitable in this case, then we
3097 put the constant in a reg and use shad. */
3098 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3099 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3100 return cost;
3102 else
3103 return ashl_lshr_seq[value].insn_count;
3106 /* Return the cost of an AND/XOR/IOR operation. */
3107 static inline int
3108 and_xor_ior_costs (rtx x, int code)
3110 /* On SH1-4 we have only max. SImode operations.
3111 Double the cost for modes > SImode. */
3112 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3114 /* A logical operation with two registers is a single cycle
3115 instruction. */
3116 if (!CONST_INT_P (XEXP (x, 1)))
3117 return 1 * cost_scale;
3119 int i = INTVAL (XEXP (x, 1));
3121 /* These constants are single cycle extu.[bw] instructions. */
3122 if ((i == 0xff || i == 0xffff) && code == AND)
3123 return 1 * cost_scale;
3124 /* Constants that can be used in an instruction as an immediate are
3125 a single cycle, but this requires r0, so make it a little more
3126 expensive. */
3127 if (CONST_OK_FOR_K08 (i))
3128 return 2 * cost_scale;
3129 /* Constants that can be loaded with a mov immediate need one more cycle.
3130 This case is probably unnecessary. */
3131 if (CONST_OK_FOR_I08 (i))
3132 return 2 * cost_scale;
3133 /* Any other constant requires an additional 2 cycle pc-relative load.
3134 This case is probably unnecessary. */
3135 return 3 * cost_scale;
3138 /* Return the cost of an addition or a subtraction. */
3139 static inline int
3140 addsubcosts (rtx x)
3142 if (GET_MODE (x) == SImode)
3144 /* The addc or subc patterns will eventually become one or two
3145 instructions. Below are some costs for some of the patterns
3146 which combine would reject because the costs of the individual
3147 insns in the patterns are lower.
3149 FIXME: It would be much easier if we had something like insn cost
3150 attributes and the cost calculation machinery used those attributes
3151 in the first place. This would eliminate redundant recog-like C
3152 code to calculate costs of complex patterns. */
3153 rtx op0 = XEXP (x, 0);
3154 rtx op1 = XEXP (x, 1);
3156 if (GET_CODE (x) == PLUS)
3158 if (GET_CODE (op0) == AND
3159 && XEXP (op0, 1) == const1_rtx
3160 && (GET_CODE (op1) == PLUS
3161 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3162 return 1;
3164 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3165 && GET_CODE (op1) == LSHIFTRT
3166 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3167 return 1;
3169 /* Let's assume that adding the result of an insns that stores into
3170 the T bit is cheap. */
3171 if (treg_set_expr (op1, SImode))
3172 return 1;
3173 if (treg_set_expr (op0, SImode))
3174 return 1;
3177 /* On SH1-4 we have only max. SImode operations.
3178 Double the cost for modes > SImode. */
3179 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3181 /* Adding a register is a single cycle insn. */
3182 if (REG_P (XEXP (x, 1))
3183 || GET_CODE (XEXP (x, 1)) == SUBREG)
3184 return 1 * cost_scale;
3186 /* Likewise for small constants. */
3187 if (CONST_INT_P (XEXP (x, 1))
3188 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3189 return 1 * cost_scale;
3191 /* Any other constant requires a 2 cycle pc-relative load plus an
3192 addition. */
3193 return 3 * cost_scale;
3196 /* Return the cost of a multiply. */
3197 static inline int
3198 multcosts (rtx x ATTRIBUTE_UNUSED)
3200 if (sh_multcost >= 0)
3201 return sh_multcost;
3203 if (TARGET_SH2)
3205 /* We have a mul insn, so we can never take more than the mul and the
3206 read of the mac reg, but count more because of the latency and extra
3207 reg usage. */
3208 if (optimize_size)
3209 return 2;
3210 return 3;
3213 /* If we're aiming at small code, then just count the number of
3214 insns in a multiply call sequence. */
3215 if (optimize_size)
3216 return 5;
3218 /* Otherwise count all the insns in the routine we'd be calling too. */
3219 return 20;
3222 /* Compute a (partial) cost for rtx X. Return true if the complete
3223 cost has been computed, and false if subexpressions should be
3224 scanned. In either case, *TOTAL contains the cost result. */
3225 static bool
3226 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
3227 int opno ATTRIBUTE_UNUSED,
3228 int *total, bool speed ATTRIBUTE_UNUSED)
3230 int code = GET_CODE (x);
3232 switch (code)
3234 /* The lower-subreg pass decides whether to split multi-word regs
3235 into individual regs by looking at the cost for a SET of certain
3236 modes with the following patterns:
3237 (set (reg) (reg))
3238 (set (reg) (const_int 0))
3239 On machines that support vector-move operations a multi-word move
3240 is the same cost as individual reg move. On SH there is no
3241 vector-move, so we have to provide the correct cost in the number
3242 of move insns to load/store the reg of the mode in question. */
3243 case SET:
3244 if (sh_movt_set_dest (x) != NULL || sh_movrt_set_dest (x) != NULL)
3246 *total = COSTS_N_INSNS (1);
3247 return true;
3250 if (register_operand (SET_DEST (x), VOIDmode)
3251 && (register_operand (SET_SRC (x), VOIDmode)
3252 || satisfies_constraint_Z (SET_SRC (x))))
3254 const machine_mode mode = GET_MODE (SET_DEST (x));
3255 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3256 / mov_insn_size (mode, TARGET_SH2A));
3257 return true;
3259 return false;
3261 /* The cost of a mem access is mainly the cost of the address mode. */
3262 case MEM:
3263 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3264 true);
3265 return true;
3267 case IF_THEN_ELSE:
3268 /* This case is required for the if_then_else negc pattern. */
3269 if (treg_set_expr (XEXP (x, 0), SImode))
3271 *total = COSTS_N_INSNS (1);
3272 return true;
3274 else
3275 return false;
3277 /* Zero extracts of single bits are usually combine patterns for the
3278 tst insns. */
3279 case ZERO_EXTRACT:
3280 if (GET_CODE (XEXP (x, 0)) == XOR
3281 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3282 && XEXP (x, 1) == const1_rtx
3283 && CONST_INT_P (XEXP (x, 2))
3284 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3285 /* Check that the xor constaint overlaps with the extracted bit. */
3286 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3288 *total = 1; //COSTS_N_INSNS (1);
3289 return true;
3292 /* div0s variant. */
3293 if (GET_CODE (XEXP (x, 0)) == XOR
3294 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3295 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3297 *total = 1;
3298 return true;
3300 return false;
3302 /* The cost of a sign or zero extend depends on whether the source is a
3303 reg or a mem. In case of a mem take the address into account. */
3304 case SIGN_EXTEND:
3305 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3307 *total = COSTS_N_INSNS (1);
3308 return true;
3310 if (MEM_P (XEXP (x, 0)))
3312 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3313 GET_MODE (XEXP (x, 0)),
3314 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3315 return true;
3317 return false;
3319 case ZERO_EXTEND:
3320 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3322 *total = COSTS_N_INSNS (1);
3323 return true;
3325 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3326 && (GET_MODE (XEXP (x, 0)) == QImode
3327 || GET_MODE (XEXP (x, 0)) == HImode))
3329 /* Handle SH2A's movu.b and movu.w insn. */
3330 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3331 GET_MODE (XEXP (x, 0)),
3332 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3333 return true;
3335 return false;
3337 /* mems for SFmode and DFmode can be inside a parallel due to
3338 the way the fpscr is handled. */
3339 case PARALLEL:
3340 for (int i = 0; i < XVECLEN (x, 0); i++)
3342 rtx xx = XVECEXP (x, 0, i);
3343 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3345 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3346 GET_MODE (XEXP (xx, 0)),
3347 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3348 return true;
3350 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3352 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3353 GET_MODE (XEXP (xx, 1)),
3354 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3355 return true;
3359 if (sh_1el_vec (x, VOIDmode))
3360 *total = outer_code != SET;
3361 else if (sh_rep_vec (x, VOIDmode))
3362 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3363 + (outer_code != SET));
3364 else
3365 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3366 return true;
3368 case CONST_INT:
3369 if (CONST_OK_FOR_I08 (INTVAL (x)))
3370 *total = 0;
3371 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3372 && CONST_OK_FOR_K08 (INTVAL (x)))
3373 *total = 1;
3374 /* prepare_cmp_insn will force costly constants int registers before
3375 the cbranch[sd]i4 patterns can see them, so preserve potentially
3376 interesting ones not covered by I08 above. */
3377 else if (outer_code == COMPARE
3378 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3379 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3380 || INTVAL (x) == 0x7fffffff
3381 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3382 *total = 1;
3383 else
3384 *total = 8;
3385 return true;
3387 case EQ:
3388 /* An and with a constant compared against zero is
3389 most likely going to be a TST #imm, R0 instruction. */
3390 if (XEXP (x, 1) == const0_rtx
3391 && ((GET_CODE (XEXP (x, 0)) == AND
3392 || (SUBREG_P (XEXP (x, 0))
3393 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND))
3394 || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT))
3396 *total = 1;
3397 return true;
3400 else if (XEXP (x, 1) == const0_rtx
3401 && GET_CODE (XEXP (x, 0)) == AND
3402 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3403 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3404 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3405 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3407 *total = 1;
3408 return true;
3410 else
3411 return false;
3413 case SMIN:
3414 case SMAX:
3415 /* This is most likely a clips.b or clips.w insn that is being made up
3416 by combine. */
3417 if (TARGET_SH2A
3418 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3419 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3420 && REG_P (XEXP (XEXP (x, 0), 0))
3421 && CONST_INT_P (XEXP (x, 1)))
3423 *total = COSTS_N_INSNS (1);
3424 return true;
3426 else
3427 return false;
3429 case CONST:
3430 case LABEL_REF:
3431 case SYMBOL_REF:
3432 *total = 5;
3433 return true;
3435 case CONST_DOUBLE:
3436 /* prepare_cmp_insn will force costly constants int registers before
3437 the cbranchdi4 pattern can see them, so preserve potentially
3438 interesting ones. */
3439 if (outer_code == COMPARE && GET_MODE (x) == DImode)
3440 *total = 1;
3441 else
3442 *total = 10;
3443 return true;
3445 case CONST_VECTOR:
3446 /* FIXME: This looks broken. Only the last statement has any effect.
3447 Probably this could be folded with the PARALLEL case? */
3448 if (x == CONST0_RTX (GET_MODE (x)))
3449 *total = 0;
3450 else if (sh_1el_vec (x, VOIDmode))
3451 *total = outer_code != SET;
3452 if (sh_rep_vec (x, VOIDmode))
3453 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3454 + (outer_code != SET));
3455 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3456 return true;
3458 case PLUS:
3459 case MINUS:
3460 *total = COSTS_N_INSNS (addsubcosts (x));
3461 return true;
3463 case AND:
3464 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3465 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3467 *total = COSTS_N_INSNS (1);
3468 return true;
3470 /* Fall through. */
3472 case XOR:
3473 case IOR:
3474 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3475 return true;
3477 case MULT:
3478 *total = COSTS_N_INSNS (multcosts (x));
3479 return true;
3481 case LT:
3482 case GE:
3483 /* div0s sign comparison. */
3484 if (GET_CODE (XEXP (x, 0)) == XOR
3485 && REG_P ((XEXP (XEXP (x, 0), 0)))
3486 && REG_P ((XEXP (XEXP (x, 0), 1)))
3487 && satisfies_constraint_Z (XEXP (x, 1)))
3489 *total = COSTS_N_INSNS (1);
3490 return true;
3492 else
3493 return false;
3495 case LSHIFTRT:
3496 /* div0s sign comparison. */
3497 if (GET_CODE (XEXP (x, 0)) == XOR
3498 && REG_P ((XEXP (XEXP (x, 0), 0)))
3499 && REG_P ((XEXP (XEXP (x, 0), 1)))
3500 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3502 *total = COSTS_N_INSNS (1);
3503 return true;
3505 /* FALLTHRU */
3506 case ASHIFT:
3507 case ASHIFTRT:
3509 int cost = shiftcosts (x);
3510 if (cost < 0)
3511 return false;
3512 *total = COSTS_N_INSNS (cost);
3513 return true;
3516 case DIV:
3517 case UDIV:
3518 case MOD:
3519 case UMOD:
3520 *total = COSTS_N_INSNS (20);
3521 return true;
3523 case FLOAT:
3524 case FIX:
3525 *total = 100;
3526 return true;
3528 default:
3529 return false;
3533 /* Determine the size of the fundamental move insn that will be used
3534 for the specified mode. */
3535 static inline int
3536 mov_insn_size (machine_mode mode, bool consider_sh2a)
3538 const int mode_sz = GET_MODE_SIZE (mode);
3540 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3541 || (TARGET_FMOVD && mode == DFmode))
3542 return mode_sz;
3543 else
3545 /* The max. available mode for actual move insns is SImode.
3546 Larger accesses will be split into multiple loads/stores. */
3547 const int max_mov_sz = GET_MODE_SIZE (SImode);
3548 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3552 /* Determine the maximum possible displacement for a move insn for the
3553 specified mode. */
3555 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3557 /* The 4 byte displacement move insns are the same as the 2 byte
3558 versions but take a 12 bit displacement. All we need to do is to
3559 scale the max. displacement value accordingly. */
3560 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3562 /* SH2A supports FPU move insns with 12 bit displacements.
3563 Other variants to do not support any kind of displacements for
3564 FPU move insns. */
3565 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3566 return 0;
3567 else
3569 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3570 const int mode_sz = GET_MODE_SIZE (mode);
3571 int r = 15 * mov_insn_sz * disp_scale;
3573 /* If the mov insn will be split into multiple loads/stores, the
3574 maximum possible displacement is a bit smaller. */
3575 if (mode_sz > mov_insn_sz)
3576 r -= mode_sz - mov_insn_sz;
3577 return r;
3581 /* Determine the alignment mask for a move insn of the
3582 specified mode. */
3583 static inline int
3584 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3586 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3587 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3590 /* Return the displacement value of a displacement address. */
3591 HOST_WIDE_INT
3592 sh_disp_addr_displacement (rtx x)
3594 gcc_assert (satisfies_constraint_Sdd (x));
3595 return INTVAL (XEXP (XEXP (x, 0), 1));
3598 /* Compute the cost of an address. */
3599 static int
3600 sh_address_cost (rtx x, machine_mode mode,
3601 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3603 /* 'GBR + 0'. Account one more because of R0 restriction. */
3604 if (REG_P (x) && REGNO (x) == GBR_REG)
3605 return 2;
3607 /* Simple reg, post-inc, pre-dec addressing. */
3608 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3609 return 1;
3611 /* 'reg + disp' addressing. */
3612 if (GET_CODE (x) == PLUS
3613 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3615 /* 'GBR + disp'. Account one more because of R0 restriction. */
3616 if (REGNO (XEXP (x, 0)) == GBR_REG
3617 && gbr_displacement (XEXP (x, 1), mode))
3618 return 2;
3620 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3622 if (offset == 0)
3623 return 1;
3625 /* The displacement would fit into a 2 byte move insn.
3626 HImode and QImode loads/stores with displacement put pressure on
3627 R0 which will most likely require another reg copy. Thus account
3628 a higher cost for that. */
3629 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3630 return (mode == HImode || mode == QImode) ? 2 : 1;
3632 /* The displacement would fit into a 4 byte move insn (SH2A). */
3633 if (TARGET_SH2A
3634 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3635 return 2;
3637 /* The displacement is probably out of range and will require extra
3638 calculations. */
3639 return 3;
3642 /* 'reg + reg' addressing. Account a slightly higher cost because of
3643 increased pressure on R0. */
3644 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1)))
3645 return 3;
3647 /* Not sure what it is - probably expensive. */
3648 return 10;
3651 /* Code to expand a shift. */
3652 static void
3653 gen_ashift (int type, int n, rtx reg)
3655 rtx n_rtx;
3657 /* Negative values here come from the shift_amounts array. */
3658 if (n < 0)
3660 if (type == ASHIFT)
3661 type = LSHIFTRT;
3662 else
3663 type = ASHIFT;
3664 n = -n;
3667 n_rtx = GEN_INT (n);
3668 gcc_assert (satisfies_constraint_P27 (n_rtx));
3670 switch (type)
3672 case ASHIFTRT:
3673 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3674 break;
3675 case LSHIFTRT:
3676 if (n == 1)
3677 emit_insn (gen_shlr (reg, reg));
3678 else
3679 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3680 break;
3681 case ASHIFT:
3682 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3683 break;
3684 default:
3685 gcc_unreachable ();
3689 /* Code to expand a HImode shift. */
3690 static void
3691 gen_ashift_hi (int type, int n, rtx reg)
3693 /* Negative values here come from the shift_amounts array. */
3694 if (n < 0)
3696 if (type == ASHIFT)
3697 type = LSHIFTRT;
3698 else
3699 type = ASHIFT;
3700 n = -n;
3703 switch (type)
3705 case ASHIFTRT:
3706 case LSHIFTRT:
3707 /* We don't have HImode right shift operations because using the
3708 ordinary 32 bit shift instructions for that doesn't generate proper
3709 zero/sign extension.
3710 gen_ashift_hi is only called in contexts where we know that the
3711 sign extension works out correctly. */
3713 int offset = 0;
3714 if (GET_CODE (reg) == SUBREG)
3716 offset = SUBREG_BYTE (reg);
3717 reg = SUBREG_REG (reg);
3719 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3720 break;
3722 case ASHIFT:
3723 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3724 break;
3728 /* Output RTL to split a constant shift into its component SH constant
3729 shift instructions. */
3730 void
3731 gen_shifty_op (int code, rtx *operands)
3733 int value = INTVAL (operands[2]);
3734 int max, i;
3736 /* Truncate the shift count in case it is out of bounds. */
3737 value = value & 31;
3739 if (value == 31)
3741 if (code == LSHIFTRT)
3743 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3744 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3745 return;
3747 else if (code == ASHIFT)
3749 /* There is a two instruction sequence for 31 bit left shifts,
3750 but it requires r0. */
3751 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3753 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3754 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3755 return;
3759 else if (value == 0)
3761 /* This can happen even when optimizing, if there were subregs before
3762 reload. Don't output a nop here, as this is never optimized away;
3763 use a no-op move instead. */
3764 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3765 return;
3768 max = ashl_lshr_seq[value].insn_count;
3769 for (i = 0; i < max; i++)
3770 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3773 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3774 don't matter. */
3775 void
3776 gen_shifty_hi_op (int code, rtx *operands)
3778 int value = INTVAL (operands[2]);
3779 int max, i;
3780 void (*gen_fun) (int, int, rtx);
3782 /* This operation is used by and_shl for SImode values with a few
3783 high bits known to be cleared. */
3784 value &= 31;
3785 if (value == 0)
3787 emit_insn (gen_nop ());
3788 return;
3791 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3792 if (code == ASHIFT)
3794 max = ext_ashl_lshr_seq[value].insn_count;
3795 for (i = 0; i < max; i++)
3796 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3798 else
3799 /* When shifting right, emit the shifts in reverse order, so that
3800 solitary negative values come first. */
3801 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3802 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3805 /* Output RTL for an arithmetic right shift.
3806 ??? Rewrite to use super-optimizer sequences. */
3807 bool
3808 expand_ashiftrt (rtx *operands)
3810 rtx wrk;
3811 char func[18];
3812 int value;
3814 if (TARGET_DYNSHIFT)
3816 if (!CONST_INT_P (operands[2]))
3818 rtx count = copy_to_mode_reg (SImode, operands[2]);
3819 emit_insn (gen_negsi2 (count, count));
3820 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3821 return true;
3823 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3824 > 1 + SH_DYNAMIC_SHIFT_COST)
3826 rtx count
3827 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3828 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3829 return true;
3832 if (!CONST_INT_P (operands[2]))
3833 return false;
3835 value = INTVAL (operands[2]) & 31;
3837 if (value == 31)
3839 /* If we are called from abs expansion, arrange things so that we
3840 we can use a single MT instruction that doesn't clobber the source,
3841 if LICM can hoist out the load of the constant zero. */
3842 if (currently_expanding_to_rtl)
3844 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3845 operands[1]));
3846 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3847 return true;
3849 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3850 return true;
3852 else if (value >= 16 && value <= 19)
3854 wrk = gen_reg_rtx (SImode);
3855 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3856 value -= 16;
3857 while (value--)
3858 gen_ashift (ASHIFTRT, 1, wrk);
3859 emit_move_insn (operands[0], wrk);
3860 return true;
3862 /* Expand a short sequence inline, longer call a magic routine. */
3863 else if (value <= 5)
3865 wrk = gen_reg_rtx (SImode);
3866 emit_move_insn (wrk, operands[1]);
3867 while (value--)
3868 gen_ashift (ASHIFTRT, 1, wrk);
3869 emit_move_insn (operands[0], wrk);
3870 return true;
3873 wrk = gen_reg_rtx (Pmode);
3875 /* Load the value into an arg reg and call a helper. */
3876 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3877 sprintf (func, "__ashiftrt_r4_%d", value);
3878 rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
3879 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
3880 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3881 return true;
3884 /* Try to find a good way to implement the combiner pattern
3885 [(set (match_operand:SI 0 "register_operand" "r")
3886 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3887 (match_operand:SI 2 "const_int_operand" "n"))
3888 (match_operand:SI 3 "const_int_operand" "n"))) .
3889 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3890 return 0 for simple right / left or left/right shift combination.
3891 return 1 for a combination of shifts with zero_extend.
3892 return 2 for a combination of shifts with an AND that needs r0.
3893 return 3 for a combination of shifts with an AND that needs an extra
3894 scratch register, when the three highmost bits of the AND mask are clear.
3895 return 4 for a combination of shifts with an AND that needs an extra
3896 scratch register, when any of the three highmost bits of the AND mask
3897 is set.
3898 If ATTRP is set, store an initial right shift width in ATTRP[0],
3899 and the instruction length in ATTRP[1] . These values are not valid
3900 when returning 0.
3901 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3902 shift_amounts for the last shift value that is to be used before the
3903 sign extend. */
3905 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3907 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3908 int left = INTVAL (left_rtx), right;
3909 int best = 0;
3910 int cost, best_cost = 10000;
3911 int best_right = 0, best_len = 0;
3912 int i;
3913 int can_ext;
3915 if (left < 0 || left > 31)
3916 return 0;
3917 if (CONST_INT_P (mask_rtx))
3918 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3919 else
3920 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3921 /* Can this be expressed as a right shift / left shift pair? */
3922 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3923 right = exact_log2 (lsb);
3924 mask2 = ~(mask + lsb - 1);
3925 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3926 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3927 if (! mask2)
3928 best_cost = ashl_lshr_seq[right].insn_count
3929 + ashl_lshr_seq[right + left].insn_count;
3930 /* mask has no trailing zeroes <==> ! right */
3931 else if (! right && mask2 == ~(lsb2 - 1))
3933 int late_right = exact_log2 (lsb2);
3934 best_cost = ashl_lshr_seq[left + late_right].insn_count
3935 + ashl_lshr_seq[late_right].insn_count;
3937 /* Try to use zero extend. */
3938 if (mask2 == ~(lsb2 - 1))
3940 int width, first;
3942 for (width = 8; width <= 16; width += 8)
3944 /* Can we zero-extend right away? */
3945 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3947 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3948 + ext_ashl_lshr_seq[left + right].insn_count;
3949 if (cost < best_cost)
3951 best = 1;
3952 best_cost = cost;
3953 best_right = right;
3954 best_len = cost;
3955 if (attrp)
3956 attrp[2] = -1;
3958 continue;
3960 /* ??? Could try to put zero extend into initial right shift,
3961 or even shift a bit left before the right shift. */
3962 /* Determine value of first part of left shift, to get to the
3963 zero extend cut-off point. */
3964 first = width - exact_log2 (lsb2) + right;
3965 if (first >= 0 && right + left - first >= 0)
3967 cost = ext_ashl_lshr_seq[right].insn_count
3968 + ext_ashl_lshr_seq[first].insn_count + 1
3969 + ext_ashl_lshr_seq[right + left - first].insn_count;
3971 if (cost < best_cost)
3973 best = 1;
3974 best_cost = cost;
3975 best_right = right;
3976 best_len = cost;
3977 if (attrp)
3978 attrp[2] = first;
3983 /* Try to use r0 AND pattern */
3984 for (i = 0; i <= 2; i++)
3986 if (i > right)
3987 break;
3988 if (! CONST_OK_FOR_K08 (mask >> i))
3989 continue;
3990 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
3991 if (cost < best_cost)
3993 best = 2;
3994 best_cost = cost;
3995 best_right = i;
3996 best_len = cost - 1;
3999 /* Try to use a scratch register to hold the AND operand. */
4000 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4001 for (i = 0; i <= 2; i++)
4003 if (i > right)
4004 break;
4005 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4006 + (can_ext
4007 ? ext_ashl_lshr_seq
4008 : ashl_lshr_seq)[left + i].insn_count;
4009 if (cost < best_cost)
4011 best = 4 - can_ext;
4012 best_cost = cost;
4013 best_right = i;
4014 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4018 if (attrp)
4020 attrp[0] = best_right;
4021 attrp[1] = best_len;
4023 return best;
4026 /* This is used in length attributes of the unnamed instructions
4027 corresponding to shl_and_kind return values of 1 and 2. */
4029 shl_and_length (rtx insn)
4031 rtx set_src, left_rtx, mask_rtx;
4032 int attributes[3];
4034 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4035 left_rtx = XEXP (XEXP (set_src, 0), 1);
4036 mask_rtx = XEXP (set_src, 1);
4037 shl_and_kind (left_rtx, mask_rtx, attributes);
4038 return attributes[1];
4041 /* This is used in length attribute of the and_shl_scratch instruction. */
4043 shl_and_scr_length (rtx insn)
4045 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4046 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4047 rtx op = XEXP (set_src, 0);
4048 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4049 op = XEXP (XEXP (op, 0), 0);
4050 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4053 /* Generate rtl for instructions for which shl_and_kind advised a particular
4054 method of generating them, i.e. returned zero. */
4055 bool
4056 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4058 int attributes[3];
4059 unsigned HOST_WIDE_INT mask;
4060 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4061 int right, total_shift;
4062 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4064 right = attributes[0];
4065 total_shift = INTVAL (left_rtx) + right;
4066 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4067 switch (kind)
4069 default:
4070 return true;
4071 case 1:
4073 int first = attributes[2];
4074 rtx operands[3];
4076 if (first < 0)
4078 emit_insn ((mask << right) <= 0xff
4079 ? gen_zero_extendqisi2 (dest,
4080 gen_lowpart (QImode, source))
4081 : gen_zero_extendhisi2 (dest,
4082 gen_lowpart (HImode, source)));
4083 source = dest;
4085 if (source != dest)
4086 emit_insn (gen_movsi (dest, source));
4087 operands[0] = dest;
4088 if (right)
4090 operands[2] = GEN_INT (right);
4091 gen_shifty_hi_op (LSHIFTRT, operands);
4093 if (first > 0)
4095 operands[2] = GEN_INT (first);
4096 gen_shifty_hi_op (ASHIFT, operands);
4097 total_shift -= first;
4098 mask <<= first;
4100 if (first >= 0)
4101 emit_insn (mask <= 0xff
4102 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4103 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4104 if (total_shift > 0)
4106 operands[2] = GEN_INT (total_shift);
4107 gen_shifty_hi_op (ASHIFT, operands);
4109 break;
4111 case 4:
4112 shift_gen_fun = gen_shifty_op;
4113 /* FALLTHRU */
4114 case 3:
4115 /* If the topmost bit that matters is set, set the topmost bits
4116 that don't matter. This way, we might be able to get a shorter
4117 signed constant. */
4118 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4119 mask |= (HOST_WIDE_INT) ((HOST_WIDE_INT_M1U) << (31 - total_shift));
4120 /* FALLTHRU */
4121 case 2:
4122 /* Don't expand fine-grained when combining, because that will
4123 make the pattern fail. */
4124 if (currently_expanding_to_rtl
4125 || reload_in_progress || reload_completed)
4127 rtx operands[3];
4129 /* Cases 3 and 4 should be handled by this split
4130 only while combining */
4131 gcc_assert (kind <= 2);
4132 if (right)
4134 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4135 source = dest;
4137 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4138 if (total_shift)
4140 operands[0] = dest;
4141 operands[1] = dest;
4142 operands[2] = GEN_INT (total_shift);
4143 shift_gen_fun (ASHIFT, operands);
4145 break;
4147 else
4149 int neg = 0;
4150 if (kind != 4 && total_shift < 16)
4152 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4153 if (neg > 0)
4154 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4155 else
4156 neg = 0;
4158 emit_insn (gen_and_shl_scratch (dest, source,
4159 GEN_INT (right),
4160 GEN_INT (mask),
4161 GEN_INT (total_shift + neg),
4162 GEN_INT (neg)));
4163 emit_insn (gen_movsi (dest, dest));
4164 break;
4167 return false;
4170 /* Try to find a good way to implement the combiner pattern
4171 [(set (match_operand:SI 0 "register_operand" "=r")
4172 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4173 (match_operand:SI 2 "const_int_operand" "n")
4174 (match_operand:SI 3 "const_int_operand" "n")
4175 (const_int 0)))
4176 (clobber (reg:SI T_REG))]
4177 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4178 return 0 for simple left / right shift combination.
4179 return 1 for left shift / 8 bit sign extend / left shift.
4180 return 2 for left shift / 16 bit sign extend / left shift.
4181 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4182 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4183 return 5 for left shift / 16 bit sign extend / right shift
4184 return 6 for < 8 bit sign extend / left shift.
4185 return 7 for < 8 bit sign extend / left shift / single right shift.
4186 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4188 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4190 int left, size, insize, ext;
4191 int cost = 0, best_cost;
4192 int kind;
4194 left = INTVAL (left_rtx);
4195 size = INTVAL (size_rtx);
4196 insize = size - left;
4197 gcc_assert (insize > 0);
4198 /* Default to left / right shift. */
4199 kind = 0;
4200 best_cost = ashl_lshr_seq[32 - insize].insn_count
4201 + ashl_lshr_seq[32 - size].insn_count;
4202 if (size <= 16)
4204 /* 16 bit shift / sign extend / 16 bit shift */
4205 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4206 + ashl_lshr_seq[16 - size].insn_count;
4207 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4208 below, by alternative 3 or something even better. */
4209 if (cost < best_cost)
4211 kind = 5;
4212 best_cost = cost;
4215 /* Try a plain sign extend between two shifts. */
4216 for (ext = 16; ext >= insize; ext -= 8)
4218 if (ext <= size)
4220 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4221 + ashl_lshr_seq[size - ext].insn_count;
4222 if (cost < best_cost)
4224 kind = ext / (unsigned) 8;
4225 best_cost = cost;
4228 /* Check if we can do a sloppy shift with a final signed shift
4229 restoring the sign. */
4230 if (EXT_SHIFT_SIGNED (size - ext))
4231 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4232 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4233 /* If not, maybe it's still cheaper to do the second shift sloppy,
4234 and do a final sign extend? */
4235 else if (size <= 16)
4236 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4237 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4238 + 1;
4239 else
4240 continue;
4241 if (cost < best_cost)
4243 kind = ext / (unsigned) 8 + 2;
4244 best_cost = cost;
4247 /* Check if we can sign extend in r0 */
4248 if (insize < 8)
4250 cost = 3 + ashl_lshr_seq[left].insn_count;
4251 if (cost < best_cost)
4253 kind = 6;
4254 best_cost = cost;
4256 /* Try the same with a final signed shift. */
4257 if (left < 31)
4259 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4260 if (cost < best_cost)
4262 kind = 7;
4263 best_cost = cost;
4267 if (TARGET_DYNSHIFT)
4269 /* Try to use a dynamic shift. */
4270 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4271 if (cost < best_cost)
4273 kind = 0;
4274 best_cost = cost;
4277 if (costp)
4278 *costp = cost;
4279 return kind;
4282 /* Function to be used in the length attribute of the instructions
4283 implementing this pattern. */
4285 shl_sext_length (rtx insn)
4287 rtx set_src, left_rtx, size_rtx;
4288 int cost;
4290 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4291 left_rtx = XEXP (XEXP (set_src, 0), 1);
4292 size_rtx = XEXP (set_src, 1);
4293 shl_sext_kind (left_rtx, size_rtx, &cost);
4294 return cost;
4297 /* Generate rtl for this pattern */
4298 bool
4299 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4301 int kind;
4302 int left, size, insize, cost;
4303 rtx operands[3];
4305 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4306 left = INTVAL (left_rtx);
4307 size = INTVAL (size_rtx);
4308 insize = size - left;
4309 switch (kind)
4311 case 1:
4312 case 2:
4313 case 3:
4314 case 4:
4316 int ext = kind & 1 ? 8 : 16;
4317 int shift2 = size - ext;
4319 /* Don't expand fine-grained when combining, because that will
4320 make the pattern fail. */
4321 if (! currently_expanding_to_rtl
4322 && ! reload_in_progress && ! reload_completed)
4324 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4325 emit_insn (gen_movsi (dest, source));
4326 break;
4328 if (dest != source)
4329 emit_insn (gen_movsi (dest, source));
4330 operands[0] = dest;
4331 if (ext - insize)
4333 operands[2] = GEN_INT (ext - insize);
4334 gen_shifty_hi_op (ASHIFT, operands);
4336 emit_insn (kind & 1
4337 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4338 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4339 if (kind <= 2)
4341 if (shift2)
4343 operands[2] = GEN_INT (shift2);
4344 gen_shifty_op (ASHIFT, operands);
4347 else
4349 if (shift2 > 0)
4351 if (EXT_SHIFT_SIGNED (shift2))
4353 operands[2] = GEN_INT (shift2 + 1);
4354 gen_shifty_op (ASHIFT, operands);
4355 operands[2] = const1_rtx;
4356 gen_shifty_op (ASHIFTRT, operands);
4357 break;
4359 operands[2] = GEN_INT (shift2);
4360 gen_shifty_hi_op (ASHIFT, operands);
4362 else if (shift2)
4364 operands[2] = GEN_INT (-shift2);
4365 gen_shifty_hi_op (LSHIFTRT, operands);
4367 emit_insn (size <= 8
4368 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4369 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4371 break;
4373 case 5:
4375 int i = 16 - size;
4376 if (! currently_expanding_to_rtl
4377 && ! reload_in_progress && ! reload_completed)
4378 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4379 else
4381 operands[0] = dest;
4382 operands[2] = GEN_INT (16 - insize);
4383 gen_shifty_hi_op (ASHIFT, operands);
4384 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4386 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4387 while (--i >= 0)
4388 gen_ashift (ASHIFTRT, 1, dest);
4389 break;
4391 case 6:
4392 case 7:
4393 /* Don't expand fine-grained when combining, because that will
4394 make the pattern fail. */
4395 if (! currently_expanding_to_rtl
4396 && ! reload_in_progress && ! reload_completed)
4398 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4399 emit_insn (gen_movsi (dest, source));
4400 break;
4402 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4403 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4404 emit_insn (gen_addsi3 (dest, dest, GEN_INT (HOST_WIDE_INT_M1U << (insize - 1))));
4405 operands[0] = dest;
4406 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4407 gen_shifty_op (ASHIFT, operands);
4408 if (kind == 7)
4409 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4410 break;
4411 default:
4412 return true;
4414 return false;
4417 typedef struct label_ref_list_d
4419 rtx_code_label *label;
4420 struct label_ref_list_d *next;
4421 } *label_ref_list_t;
4423 static object_allocator<label_ref_list_d> label_ref_list_d_pool
4424 ("label references list");
4426 /* The SH cannot load a large constant into a register, constants have to
4427 come from a pc relative load. The reference of a pc relative load
4428 instruction must be less than 1k in front of the instruction. This
4429 means that we often have to dump a constant inside a function, and
4430 generate code to branch around it.
4432 It is important to minimize this, since the branches will slow things
4433 down and make things bigger.
4435 Worst case code looks like:
4437 mov.l L1,rn
4438 bra L2
4440 align
4441 L1: .long value
4445 mov.l L3,rn
4446 bra L4
4448 align
4449 L3: .long value
4453 We fix this by performing a scan before scheduling, which notices which
4454 instructions need to have their operands fetched from the constant table
4455 and builds the table.
4457 The algorithm is:
4459 scan, find an instruction which needs a pcrel move. Look forward, find the
4460 last barrier which is within MAX_COUNT bytes of the requirement.
4461 If there isn't one, make one. Process all the instructions between
4462 the find and the barrier.
4464 In the above example, we can tell that L3 is within 1k of L1, so
4465 the first move can be shrunk from the 3 insn+constant sequence into
4466 just 1 insn, and the constant moved to L3 to make:
4468 mov.l L1,rn
4470 mov.l L3,rn
4471 bra L4
4473 align
4474 L3:.long value
4475 L4:.long value
4477 Then the second move becomes the target for the shortening process. */
4479 typedef struct
4481 rtx value; /* Value in table. */
4482 rtx_code_label *label; /* Label of value. */
4483 label_ref_list_t wend; /* End of window. */
4484 machine_mode mode; /* Mode of value. */
4486 /* True if this constant is accessed as part of a post-increment
4487 sequence. Note that HImode constants are never accessed in this way. */
4488 bool part_of_sequence_p;
4489 } pool_node;
4491 /* The maximum number of constants that can fit into one pool, since
4492 constants in the range 0..510 are at least 2 bytes long, and in the
4493 range from there to 1018 at least 4 bytes. */
4495 #define MAX_POOL_SIZE 372
4496 static pool_node pool_vector[MAX_POOL_SIZE];
4497 static int pool_size;
4498 static rtx_code_label *pool_window_label;
4499 static int pool_window_last;
4501 static int max_labelno_before_reorg;
4503 /* ??? If we need a constant in HImode which is the truncated value of a
4504 constant we need in SImode, we could combine the two entries thus saving
4505 two bytes. Is this common enough to be worth the effort of implementing
4506 it? */
4508 /* ??? This stuff should be done at the same time that we shorten branches.
4509 As it is now, we must assume that all branches are the maximum size, and
4510 this causes us to almost always output constant pools sooner than
4511 necessary. */
4513 /* Add a constant to the pool and return its label. */
4514 static rtx_code_label *
4515 add_constant (rtx x, machine_mode mode, rtx last_value)
4517 rtx_code_label *lab, *new_rtx;
4518 label_ref_list_t ref, newref;
4520 /* First see if we've already got it. */
4521 for (int i = 0; i < pool_size; i++)
4523 if (x->code == pool_vector[i].value->code
4524 && mode == pool_vector[i].mode)
4526 if (x->code == CODE_LABEL)
4528 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4529 continue;
4531 if (rtx_equal_p (x, pool_vector[i].value))
4533 lab = new_rtx = 0;
4534 if (! last_value
4535 || ! i
4536 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4538 new_rtx = gen_label_rtx ();
4539 LABEL_REFS (new_rtx) = pool_vector[i].label;
4540 pool_vector[i].label = lab = new_rtx;
4542 if (lab && pool_window_label)
4544 newref = label_ref_list_d_pool.allocate ();
4545 newref->label = pool_window_label;
4546 ref = pool_vector[pool_window_last].wend;
4547 newref->next = ref;
4548 pool_vector[pool_window_last].wend = newref;
4550 if (new_rtx)
4551 pool_window_label = new_rtx;
4552 pool_window_last = i;
4553 return lab;
4558 /* Need a new one. */
4559 pool_vector[pool_size].value = x;
4560 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4562 lab = 0;
4563 pool_vector[pool_size - 1].part_of_sequence_p = true;
4565 else
4566 lab = gen_label_rtx ();
4567 pool_vector[pool_size].mode = mode;
4568 pool_vector[pool_size].label = lab;
4569 pool_vector[pool_size].wend = NULL;
4570 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4571 if (lab && pool_window_label)
4573 newref = label_ref_list_d_pool.allocate ();
4574 newref->label = pool_window_label;
4575 ref = pool_vector[pool_window_last].wend;
4576 newref->next = ref;
4577 pool_vector[pool_window_last].wend = newref;
4579 if (lab)
4580 pool_window_label = lab;
4581 pool_window_last = pool_size;
4582 pool_size++;
4583 return lab;
4586 /* Output the literal table. START, if nonzero, is the first instruction
4587 this table is needed for, and also indicates that there is at least one
4588 casesi_worker_2 instruction; We have to emit the operand3 labels from
4589 these insns at a 4-byte aligned position. BARRIER is the barrier
4590 after which we are to place the table. */
4591 static void
4592 dump_table (rtx_insn *start, rtx_insn *barrier)
4594 rtx_insn *scan = barrier;
4595 bool need_align = true;
4596 rtx_code_label *lab;
4597 label_ref_list_t ref;
4598 bool have_df = false;
4600 /* Do two passes, first time dump out the HI sized constants. */
4602 for (int i = 0; i < pool_size; i++)
4604 pool_node *p = &pool_vector[i];
4606 if (p->mode == HImode)
4608 if (need_align)
4610 scan = emit_insn_after (gen_align_2 (), scan);
4611 need_align = false;
4613 for (lab = p->label; lab;
4614 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4615 scan = emit_label_after (lab, scan);
4616 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4617 scan);
4618 for (ref = p->wend; ref; ref = ref->next)
4620 lab = ref->label;
4621 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4624 else if (p->mode == DFmode)
4625 have_df = true;
4628 need_align = true;
4630 if (start)
4632 scan = emit_insn_after (gen_align_4 (), scan);
4633 need_align = false;
4634 for (; start != barrier; start = NEXT_INSN (start))
4635 if (NONJUMP_INSN_P (start)
4636 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4638 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4639 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4641 scan = emit_label_after (as_a <rtx_insn *> (lab), scan);
4644 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4646 rtx_insn *align_insn = NULL;
4648 scan = emit_label_after (gen_label_rtx (), scan);
4649 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4650 need_align = false;
4652 for (int i = 0; i < pool_size; i++)
4654 pool_node *p = &pool_vector[i];
4656 switch (p->mode)
4658 case E_HImode:
4659 break;
4660 case E_SImode:
4661 case E_SFmode:
4662 if (align_insn && !p->part_of_sequence_p)
4664 for (lab = p->label; lab;
4665 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4666 emit_label_before (lab, align_insn);
4667 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4668 align_insn);
4669 for (ref = p->wend; ref; ref = ref->next)
4671 lab = ref->label;
4672 emit_insn_before (gen_consttable_window_end (lab),
4673 align_insn);
4675 delete_insn (align_insn);
4676 align_insn = NULL;
4677 continue;
4679 else
4681 for (lab = p->label; lab;
4682 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4683 scan = emit_label_after (lab, scan);
4684 scan = emit_insn_after (gen_consttable_4 (p->value,
4685 const0_rtx), scan);
4686 need_align = ! need_align;
4688 break;
4689 case E_DFmode:
4690 if (need_align)
4692 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4693 align_insn = scan;
4694 need_align = false;
4696 /* FALLTHRU */
4697 case E_DImode:
4698 for (lab = p->label; lab;
4699 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4700 scan = emit_label_after (lab, scan);
4701 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4702 scan);
4703 break;
4704 default:
4705 gcc_unreachable ();
4708 if (p->mode != HImode)
4710 for (ref = p->wend; ref; ref = ref->next)
4712 lab = ref->label;
4713 scan = emit_insn_after (gen_consttable_window_end (lab),
4714 scan);
4719 pool_size = 0;
4722 for (int i = 0; i < pool_size; i++)
4724 pool_node *p = &pool_vector[i];
4726 switch (p->mode)
4728 case E_HImode:
4729 break;
4730 case E_SImode:
4731 case E_SFmode:
4732 if (need_align)
4734 need_align = false;
4735 scan = emit_label_after (gen_label_rtx (), scan);
4736 scan = emit_insn_after (gen_align_4 (), scan);
4738 for (lab = p->label; lab;
4739 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4740 scan = emit_label_after (lab, scan);
4741 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4742 scan);
4743 break;
4744 case E_DFmode:
4745 case E_DImode:
4746 if (need_align)
4748 need_align = false;
4749 scan = emit_label_after (gen_label_rtx (), scan);
4750 scan = emit_insn_after (gen_align_4 (), scan);
4752 for (lab = p->label; lab;
4753 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4754 scan = emit_label_after (lab, scan);
4755 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4756 scan);
4757 break;
4758 default:
4759 gcc_unreachable ();
4762 if (p->mode != HImode)
4764 for (ref = p->wend; ref; ref = ref->next)
4766 lab = ref->label;
4767 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4772 scan = emit_insn_after (gen_consttable_end (), scan);
4773 scan = emit_barrier_after (scan);
4774 pool_size = 0;
4775 pool_window_label = NULL;
4776 pool_window_last = 0;
4779 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4781 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4783 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4784 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4785 need to fix it if the input value is CONST_OK_FOR_I08. */
4786 static bool
4787 broken_move (rtx_insn *insn)
4789 if (NONJUMP_INSN_P (insn))
4791 rtx pat = PATTERN (insn);
4792 if (GET_CODE (pat) == PARALLEL)
4793 pat = XVECEXP (pat, 0, 0);
4794 if (GET_CODE (pat) == SET
4795 /* We can load any 8-bit value if we don't care what the high
4796 order bits end up as. */
4797 && GET_MODE (SET_DEST (pat)) != QImode
4798 && (CONSTANT_P (SET_SRC (pat))
4799 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4800 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4801 /* Match mova_const. */
4802 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4803 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4804 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4805 && ! (TARGET_SH2E
4806 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4807 && (fp_zero_operand (SET_SRC (pat))
4808 || fp_one_operand (SET_SRC (pat)))
4809 /* In general we don't know the current setting of fpscr, so
4810 disable fldi.
4811 There is an exception if this was a register-register move
4812 before reload - and hence it was ascertained that we have
4813 single precision setting - and in a post-reload optimization
4814 we changed this to do a constant load. In that case
4815 we don't have an r0 clobber, hence we must use fldi. */
4816 && (TARGET_FMOVD
4817 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4818 == SCRATCH))
4819 && REG_P (SET_DEST (pat))
4820 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4821 && ! (TARGET_SH2A
4822 && GET_MODE (SET_DEST (pat)) == SImode
4823 && (satisfies_constraint_I20 (SET_SRC (pat))
4824 || satisfies_constraint_I28 (SET_SRC (pat))))
4825 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4826 return true;
4829 return false;
4832 /* Return true if the specified insn is a mova insn. */
4833 static bool
4834 mova_p (rtx_insn *insn)
4836 return (NONJUMP_INSN_P (insn)
4837 && GET_CODE (PATTERN (insn)) == SET
4838 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4839 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4840 /* Don't match mova_const. */
4841 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4844 /* Fix up a mova from a switch that went out of range. */
4845 static void
4846 fixup_mova (rtx_insn *mova)
4848 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4849 if (! flag_pic)
4851 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4852 INSN_CODE (mova) = -1;
4854 else
4856 rtx_insn *worker = mova;
4857 rtx_code_label *lab = gen_label_rtx ();
4858 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4862 worker = NEXT_INSN (worker);
4863 gcc_assert (worker
4864 && !LABEL_P (worker)
4865 && !JUMP_P (worker));
4866 } while (NOTE_P (worker)
4867 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4868 wpat = PATTERN (worker);
4869 wpat0 = XVECEXP (wpat, 0, 0);
4870 wpat1 = XVECEXP (wpat, 0, 1);
4871 wsrc = SET_SRC (wpat0);
4872 PATTERN (worker) = (gen_casesi_worker_2
4873 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4874 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4875 XEXP (wpat1, 0)));
4876 INSN_CODE (worker) = -1;
4877 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4878 base = gen_rtx_LABEL_REF (Pmode, lab);
4879 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4880 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4881 INSN_CODE (mova) = -1;
4885 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4886 *num_mova, and check if the new mova is not nested within the first one.
4887 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4888 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4889 static int
4890 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
4892 int n_addr = 0; /* Initialization to shut up spurious warning. */
4893 int f_target, n_target = 0; /* Likewise. */
4895 if (optimize)
4897 /* If NEW_MOVA has no address yet, it will be handled later. */
4898 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4899 return -1;
4901 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4902 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4903 if (n_addr > n_target || n_addr + 1022 < n_target)
4905 /* Change the mova into a load.
4906 broken_move will then return true for it. */
4907 fixup_mova (new_mova);
4908 return 1;
4911 if (!(*num_mova)++)
4913 *first_mova = new_mova;
4914 return 2;
4916 if (!optimize
4917 || ((f_target
4918 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4919 >= n_target))
4920 return -1;
4922 (*num_mova)--;
4923 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4924 > n_target - n_addr)
4926 fixup_mova (*first_mova);
4927 return 0;
4929 else
4931 fixup_mova (new_mova);
4932 return 1;
4936 /* Find the last barrier from insn FROM which is close enough to hold the
4937 constant pool. If we can't find one, then create one near the end of
4938 the range. */
4939 static rtx_insn *
4940 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
4942 int count_si = 0;
4943 int count_hi = 0;
4944 int found_hi = 0;
4945 int found_si = 0;
4946 int hi_align = 2;
4947 int si_align = 2;
4948 int leading_mova = num_mova;
4949 rtx_insn *barrier_before_mova = NULL;
4950 rtx_insn *found_barrier = NULL;
4951 rtx_insn *good_barrier = NULL;
4952 int si_limit;
4953 int hi_limit;
4954 rtx_insn *orig = from;
4955 rtx_insn *last_got = NULL;
4956 rtx_insn *last_symoff = NULL;
4958 /* For HImode: range is 510, add 4 because pc counts from address of
4959 second instruction after this one, subtract 2 for the jump instruction
4960 that we may need to emit before the table, subtract 2 for the instruction
4961 that fills the jump delay slot (in very rare cases, reorg will take an
4962 instruction from after the constant pool or will leave the delay slot
4963 empty). This gives 510.
4964 For SImode: range is 1020, add 4 because pc counts from address of
4965 second instruction after this one, subtract 2 in case pc is 2 byte
4966 aligned, subtract 2 for the jump instruction that we may need to emit
4967 before the table, subtract 2 for the instruction that fills the jump
4968 delay slot. This gives 1018. */
4970 /* The branch will always be shortened now that the reference address for
4971 forward branches is the successor address, thus we need no longer make
4972 adjustments to the [sh]i_limit for -O0. */
4974 si_limit = 1018;
4975 hi_limit = 510;
4977 while (from && count_si < si_limit && count_hi < hi_limit)
4979 int inc = get_attr_length (from);
4980 int new_align = 1;
4982 /* If this is a label that existed at the time of the compute_alignments
4983 call, determine the alignment. N.B. When find_barrier recurses for
4984 an out-of-reach mova, we might see labels at the start of previously
4985 inserted constant tables. */
4986 if (LABEL_P (from)
4987 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4989 if (optimize)
4990 new_align = 1 << label_to_alignment (from).levels[0].log;
4991 else if (BARRIER_P (prev_nonnote_insn (from)))
4992 new_align = 1 << barrier_align (from);
4993 else
4994 new_align = 1;
4995 inc = 0;
4997 /* In case we are scanning a constant table because of recursion, check
4998 for explicit alignments. If the table is long, we might be forced
4999 to emit the new table in front of it; the length of the alignment
5000 might be the last straw. */
5001 else if (NONJUMP_INSN_P (from)
5002 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5003 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5004 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5005 /* When we find the end of a constant table, paste the new constant
5006 at the end. That is better than putting it in front because
5007 this way, we don't need extra alignment for adding a 4-byte-aligned
5008 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5009 else if (NONJUMP_INSN_P (from)
5010 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5011 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5012 return from;
5014 if (BARRIER_P (from))
5016 rtx_insn *next;
5018 found_barrier = from;
5020 /* If we are at the end of the function, or in front of an alignment
5021 instruction, we need not insert an extra alignment. We prefer
5022 this kind of barrier. */
5023 if (barrier_align (from) > 2)
5024 good_barrier = from;
5026 /* If we are at the end of a hot/cold block, dump the constants
5027 here. */
5028 next = NEXT_INSN (from);
5029 if (next
5030 && NOTE_P (next)
5031 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5032 break;
5035 if (broken_move (from))
5037 rtx pat, src, dst;
5038 machine_mode mode;
5040 pat = PATTERN (from);
5041 if (GET_CODE (pat) == PARALLEL)
5042 pat = XVECEXP (pat, 0, 0);
5043 src = SET_SRC (pat);
5044 dst = SET_DEST (pat);
5045 mode = GET_MODE (dst);
5047 /* GOT pcrelat setting comes in pair of
5048 mova .L8,r0
5049 mov.l .L8,r12
5050 instructions. (plus add r0,r12).
5051 Remember if we see one without the other. */
5052 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5053 last_got = last_got ? NULL : from;
5054 else if (PIC_ADDR_P (src))
5055 last_got = last_got ? NULL : from;
5057 /* We must explicitly check the mode, because sometimes the
5058 front end will generate code to load unsigned constants into
5059 HImode targets without properly sign extending them. */
5060 if (mode == HImode
5061 || (mode == SImode && satisfies_constraint_I16 (src)
5062 && REGNO (dst) != FPUL_REG))
5064 found_hi += 2;
5065 /* We put the short constants before the long constants, so
5066 we must count the length of short constants in the range
5067 for the long constants. */
5068 /* ??? This isn't optimal, but is easy to do. */
5069 si_limit -= 2;
5071 else
5073 /* We dump DF/DI constants before SF/SI ones, because
5074 the limit is the same, but the alignment requirements
5075 are higher. We may waste up to 4 additional bytes
5076 for alignment, and the DF/DI constant may have
5077 another SF/SI constant placed before it. */
5078 while (si_align > 2 && found_si + si_align - 2 > count_si)
5079 si_align >>= 1;
5080 if (found_si > count_si)
5081 count_si = found_si;
5082 found_si += GET_MODE_SIZE (mode);
5083 if (num_mova)
5084 si_limit -= GET_MODE_SIZE (mode);
5088 if (mova_p (from))
5090 switch (untangle_mova (&num_mova, &mova, from))
5092 case 1:
5093 if (flag_pic)
5095 rtx src = SET_SRC (PATTERN (from));
5096 if (GET_CODE (src) == CONST
5097 && GET_CODE (XEXP (src, 0)) == UNSPEC
5098 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5099 last_symoff = from;
5101 break;
5102 case 0: return find_barrier (0, 0, mova);
5103 case 2:
5105 leading_mova = 0;
5106 barrier_before_mova
5107 = good_barrier ? good_barrier : found_barrier;
5109 default: break;
5111 if (found_si > count_si)
5112 count_si = found_si;
5114 else if (JUMP_TABLE_DATA_P (from)
5115 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5117 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5118 || (num_mova
5119 && (prev_nonnote_insn (from)
5120 == XEXP (MOVA_LABELREF (mova), 0))))
5121 num_mova--;
5122 if (barrier_align (next_real_insn (from)) == align_jumps.levels[0].log)
5124 /* We have just passed the barrier in front of the
5125 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5126 the ADDR_DIFF_VEC is accessed as data, just like our pool
5127 constants, this is a good opportunity to accommodate what
5128 we have gathered so far.
5129 If we waited any longer, we could end up at a barrier in
5130 front of code, which gives worse cache usage for separated
5131 instruction / data caches. */
5132 good_barrier = found_barrier;
5133 break;
5135 else
5137 rtx body = PATTERN (from);
5138 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5141 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5142 else if (JUMP_P (from)
5143 && ! TARGET_SH2
5144 && ! optimize_size)
5145 new_align = 4;
5147 /* There is a possibility that a bf is transformed into a bf/s by the
5148 delay slot scheduler. */
5149 if (JUMP_P (from)
5150 && get_attr_type (from) == TYPE_CBRANCH
5151 && ! sequence_insn_p (from))
5152 inc += 2;
5154 if (found_si)
5156 count_si += inc;
5157 if (new_align > si_align)
5159 si_limit -= (count_si - 1) & (new_align - si_align);
5160 si_align = new_align;
5162 count_si = (count_si + new_align - 1) & -new_align;
5164 if (found_hi)
5166 count_hi += inc;
5167 if (new_align > hi_align)
5169 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5170 hi_align = new_align;
5172 count_hi = (count_hi + new_align - 1) & -new_align;
5174 from = NEXT_INSN (from);
5177 if (num_mova)
5179 if (leading_mova)
5181 /* Try as we might, the leading mova is out of range. Change
5182 it into a load (which will become a pcload) and retry. */
5183 fixup_mova (mova);
5184 return find_barrier (0, 0, mova);
5186 else
5188 /* Insert the constant pool table before the mova instruction,
5189 to prevent the mova label reference from going out of range. */
5190 from = mova;
5191 good_barrier = found_barrier = barrier_before_mova;
5195 if (found_barrier)
5197 if (good_barrier && next_real_insn (found_barrier))
5198 found_barrier = good_barrier;
5200 else
5202 /* We didn't find a barrier in time to dump our stuff,
5203 so we'll make one. */
5204 rtx_code_label *label = gen_label_rtx ();
5206 /* Don't emit a constant table in the middle of insns for
5207 casesi_worker_2. This is a bit overkill but is enough
5208 because casesi_worker_2 wouldn't appear so frequently. */
5209 if (last_symoff)
5210 from = last_symoff;
5212 /* If we exceeded the range, then we must back up over the last
5213 instruction we looked at. Otherwise, we just need to undo the
5214 NEXT_INSN at the end of the loop. */
5215 if (PREV_INSN (from) != orig
5216 && (count_hi > hi_limit || count_si > si_limit))
5217 from = PREV_INSN (PREV_INSN (from));
5218 else
5219 from = PREV_INSN (from);
5221 /* Don't emit a constant table int the middle of global pointer setting,
5222 since that that would move the addressing base GOT into another table.
5223 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5224 in the pool anyway, so just move up the whole constant pool.
5226 However, avoid doing so when the last single GOT mov is the starting
5227 insn itself. Going past above the start insn would create a negative
5228 offset, causing errors. */
5229 if (last_got && last_got != orig)
5230 from = PREV_INSN (last_got);
5232 /* Don't insert the constant pool table at the position which
5233 may be the landing pad. */
5234 if (flag_exceptions
5235 && CALL_P (from)
5236 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5237 from = PREV_INSN (from);
5239 /* Walk back to be just before any jump or label.
5240 Putting it before a label reduces the number of times the branch
5241 around the constant pool table will be hit. Putting it before
5242 a jump makes it more likely that the bra delay slot will be
5243 filled. */
5244 while (NOTE_P (from) || JUMP_P (from) || LABEL_P (from))
5245 from = PREV_INSN (from);
5247 if (CALL_P (from))
5249 bool sibcall_p = SIBLING_CALL_P (from);
5251 /* If FROM was a sibling call, then we know that control
5252 will not return. In fact, we were guaranteed to hit
5253 a barrier before another real insn.
5255 The jump around the constant pool is unnecessary. It
5256 costs space, but more importantly it confuses dwarf2cfi
5257 generation. */
5258 if (sibcall_p)
5259 return emit_barrier_after (from);
5262 from = emit_jump_insn_after (gen_jump (label), from);
5263 JUMP_LABEL (from) = label;
5264 LABEL_NUSES (label) = 1;
5265 found_barrier = emit_barrier_after (from);
5266 emit_label_after (label, found_barrier);
5269 return found_barrier;
5272 /* If the instruction INSN is implemented by a special function, and we can
5273 positively find the register that is used to call the sfunc, and this
5274 register is not used anywhere else in this instruction - except as the
5275 destination of a set, return this register; else, return 0. */
5277 sfunc_uses_reg (rtx_insn *insn)
5279 int i;
5280 rtx pattern, part, reg_part, reg;
5282 if (!NONJUMP_INSN_P (insn))
5283 return NULL_RTX;
5284 pattern = PATTERN (insn);
5285 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5286 return NULL_RTX;
5288 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5290 part = XVECEXP (pattern, 0, i);
5291 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5292 reg_part = part;
5294 if (! reg_part)
5295 return NULL_RTX;
5296 reg = XEXP (reg_part, 0);
5297 for (int i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5299 part = XVECEXP (pattern, 0, i);
5300 if (part == reg_part || GET_CODE (part) == CLOBBER)
5301 continue;
5302 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5303 && REG_P (SET_DEST (part)))
5304 ? SET_SRC (part) : part)))
5305 return NULL_RTX;
5307 return reg;
5310 /* See if the only way in which INSN uses REG is by calling it, or by
5311 setting it while calling it. Set *SET to a SET rtx if the register
5312 is set by INSN. */
5313 static bool
5314 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5316 *set = NULL_RTX;
5318 rtx reg2 = sfunc_uses_reg (insn);
5319 if (reg2 && REGNO (reg2) == REGNO (reg))
5321 rtx pattern = single_set (insn);
5322 if (pattern
5323 && REG_P (SET_DEST (pattern))
5324 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5325 *set = pattern;
5326 return false;
5328 if (!CALL_P (insn))
5330 /* We don't use rtx_equal_p because we don't care if the mode is
5331 different. */
5332 rtx pattern = single_set (insn);
5333 if (pattern
5334 && REG_P (SET_DEST (pattern))
5335 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5337 rtx par, part;
5338 int i;
5340 *set = pattern;
5341 par = PATTERN (insn);
5342 if (GET_CODE (par) == PARALLEL)
5343 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5345 part = XVECEXP (par, 0, i);
5346 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5347 return true;
5349 return reg_mentioned_p (reg, SET_SRC (pattern));
5352 return true;
5355 rtx pattern = PATTERN (insn);
5357 if (GET_CODE (pattern) == PARALLEL)
5359 for (int i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5360 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5361 return true;
5362 pattern = XVECEXP (pattern, 0, 0);
5365 if (GET_CODE (pattern) == SET)
5367 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5369 /* We don't use rtx_equal_p, because we don't care if the
5370 mode is different. */
5371 if (!REG_P (SET_DEST (pattern))
5372 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5373 return true;
5375 *set = pattern;
5378 pattern = SET_SRC (pattern);
5381 if (GET_CODE (pattern) != CALL
5382 || !MEM_P (XEXP (pattern, 0))
5383 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5384 return true;
5386 return false;
5389 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5390 general registers. Bits 0..15 mean that the respective registers
5391 are used as inputs in the instruction. Bits 16..31 mean that the
5392 registers 0..15, respectively, are used as outputs, or are clobbered.
5393 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5395 regs_used (rtx x, int is_dest)
5397 enum rtx_code code;
5398 const char *fmt;
5399 int used = 0;
5401 if (! x)
5402 return used;
5403 code = GET_CODE (x);
5404 switch (code)
5406 case REG:
5407 if (REGNO (x) < 16)
5408 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5409 << (REGNO (x) + is_dest));
5410 return 0;
5411 case SUBREG:
5413 rtx y = SUBREG_REG (x);
5415 if (!REG_P (y))
5416 break;
5417 if (REGNO (y) < 16)
5418 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5419 << (REGNO (y) +
5420 subreg_regno_offset (REGNO (y),
5421 GET_MODE (y),
5422 SUBREG_BYTE (x),
5423 GET_MODE (x)) + is_dest));
5424 return 0;
5426 case SET:
5427 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5428 case RETURN:
5429 /* If there was a return value, it must have been indicated with USE. */
5430 return 0x00ffff00;
5431 case CLOBBER:
5432 is_dest = 1;
5433 break;
5434 case MEM:
5435 is_dest = 0;
5436 break;
5437 case CALL:
5438 used |= 0x00ff00f0;
5439 break;
5440 default:
5441 break;
5444 fmt = GET_RTX_FORMAT (code);
5446 for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5448 if (fmt[i] == 'E')
5450 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
5451 used |= regs_used (XVECEXP (x, i, j), is_dest);
5453 else if (fmt[i] == 'e')
5454 used |= regs_used (XEXP (x, i), is_dest);
5456 return used;
5459 /* Create an instruction that prevents redirection of a conditional branch
5460 to the destination of the JUMP with address ADDR.
5461 If the branch needs to be implemented as an indirect jump, try to find
5462 a scratch register for it.
5463 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5464 If any preceding insn that doesn't fit into a delay slot is good enough,
5465 pass 1. Pass 2 if a definite blocking insn is needed.
5466 -1 is used internally to avoid deep recursion.
5467 If a blocking instruction is made or recognized, return it. */
5468 static rtx_insn *
5469 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5471 int dead = 0;
5472 rtx_insn *prev = prev_nonnote_insn (jump);
5474 /* First, check if we already have an instruction that satisfies our need. */
5475 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5477 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5478 return prev;
5479 if (GET_CODE (PATTERN (prev)) == USE
5480 || GET_CODE (PATTERN (prev)) == CLOBBER
5481 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5482 prev = jump;
5483 else if ((need_block &= ~1) < 0)
5484 return prev;
5485 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5486 need_block = 0;
5488 if (GET_CODE (PATTERN (jump)) == RETURN)
5490 if (! need_block)
5491 return prev;
5492 /* Reorg even does nasty things with return insns that cause branches
5493 to go out of range - see find_end_label and callers. */
5494 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5496 /* We can't use JUMP_LABEL here because it might be undefined
5497 when not optimizing. */
5498 rtx dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5499 /* If the branch is out of range, try to find a scratch register for it. */
5500 if (optimize
5501 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5502 > 4092 + 4098))
5504 rtx_insn *scan;
5505 /* Don't look for the stack pointer as a scratch register,
5506 it would cause trouble if an interrupt occurred. */
5507 unsigned attempt = 0x7fff, used;
5508 int jump_left = flag_expensive_optimizations + 1;
5510 /* It is likely that the most recent eligible instruction is wanted for
5511 the delay slot. Therefore, find out which registers it uses, and
5512 try to avoid using them. */
5514 for (scan = jump; (scan = PREV_INSN (scan)); )
5516 if (scan->deleted ())
5517 continue;
5518 rtx_code code = GET_CODE (scan);
5519 if (code == CODE_LABEL || code == JUMP_INSN)
5520 break;
5521 if (code == INSN
5522 && GET_CODE (PATTERN (scan)) != USE
5523 && GET_CODE (PATTERN (scan)) != CLOBBER
5524 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5526 attempt &= ~regs_used (PATTERN (scan), 0);
5527 break;
5530 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5531 (scan = NEXT_INSN (scan)); )
5533 if (scan->deleted ())
5534 continue;
5535 rtx_code code = GET_CODE (scan);
5536 if (INSN_P (scan))
5538 used |= regs_used (PATTERN (scan), 0);
5539 if (code == CALL_INSN)
5540 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5541 dead |= (used >> 16) & ~used;
5542 if (dead & attempt)
5544 dead &= attempt;
5545 break;
5547 if (code == JUMP_INSN)
5549 if (jump_left-- && simplejump_p (scan))
5550 scan = JUMP_LABEL_AS_INSN (scan);
5551 else
5552 break;
5556 /* Mask out the stack pointer again, in case it was
5557 the only 'free' register we have found. */
5558 dead &= 0x7fff;
5560 /* If the immediate destination is still in range, check for possible
5561 threading with a jump beyond the delay slot insn.
5562 Don't check if we are called recursively; the jump has been or will be
5563 checked in a different invocation then. */
5565 else if (optimize && need_block >= 0)
5567 rtx_insn *next = next_active_insn (as_a<rtx_insn *> (dest));
5568 next = next_active_insn (next);
5569 if (next && JUMP_P (next)
5570 && GET_CODE (PATTERN (next)) == SET
5571 && recog_memoized (next) == CODE_FOR_jump_compact)
5573 dest = JUMP_LABEL (next);
5574 if (dest
5575 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5576 > 4092 + 4098))
5577 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5581 if (dead)
5583 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5585 /* It would be nice if we could convert the jump into an indirect
5586 jump / far branch right now, and thus exposing all constituent
5587 instructions to further optimization. However, reorg uses
5588 simplejump_p to determine if there is an unconditional jump where
5589 it should try to schedule instructions from the target of the
5590 branch; simplejump_p fails for indirect jumps even if they have
5591 a JUMP_LABEL. */
5592 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5593 (reg, GEN_INT (unspec_bbr_uid++)),
5594 jump);
5595 /* ??? We would like this to have the scope of the jump, but that
5596 scope will change when a delay slot insn of an inner scope is added.
5597 Hence, after delay slot scheduling, we'll have to expect
5598 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5599 the jump. */
5601 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5602 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5603 return insn;
5605 else if (need_block)
5606 /* We can't use JUMP_LABEL here because it might be undefined
5607 when not optimizing. */
5608 return emit_insn_before (gen_block_branch_redirect
5609 (GEN_INT (unspec_bbr_uid++)),
5610 jump);
5611 return prev;
5614 #define CONDJUMP_MIN -252
5615 #define CONDJUMP_MAX 262
5616 struct far_branch
5618 /* A label (to be placed) in front of the jump
5619 that jumps to our ultimate destination. */
5620 rtx_insn *near_label;
5621 /* Where we are going to insert it if we cannot move the jump any farther,
5622 or the jump itself if we have picked up an existing jump. */
5623 rtx_insn *insert_place;
5624 /* The ultimate destination. */
5625 rtx_insn *far_label;
5626 struct far_branch *prev;
5627 /* If the branch has already been created, its address;
5628 else the address of its first prospective user. */
5629 int address;
5632 enum mdep_reorg_phase_e mdep_reorg_phase;
5634 static void
5635 gen_far_branch (struct far_branch *bp)
5637 rtx_insn *insn = bp->insert_place;
5638 rtx_jump_insn *jump;
5639 rtx_code_label *label = gen_label_rtx ();
5641 emit_label_after (label, insn);
5642 if (bp->far_label)
5644 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5645 LABEL_NUSES (bp->far_label)++;
5647 else
5648 jump = emit_jump_insn_after (gen_return (), insn);
5650 /* Emit a barrier so that reorg knows that any following instructions
5651 are not reachable via a fall-through path.
5652 But don't do this when not optimizing, since we wouldn't suppress the
5653 alignment for the barrier then, and could end up with out-of-range
5654 pc-relative loads. */
5655 if (optimize)
5656 emit_barrier_after (jump);
5657 emit_label_after (bp->near_label, insn);
5659 if (bp->far_label)
5660 JUMP_LABEL (jump) = bp->far_label;
5661 else
5663 rtx pat = PATTERN (jump);
5664 gcc_assert (ANY_RETURN_P (pat));
5665 JUMP_LABEL (jump) = pat;
5668 bool ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5669 gcc_assert (ok);
5671 /* If we are branching around a jump (rather than a return), prevent
5672 reorg from using an insn from the jump target as the delay slot insn -
5673 when reorg did this, it pessimized code (we rather hide the delay slot)
5674 and it could cause branches to go out of range. */
5675 if (bp->far_label)
5676 (emit_insn_after
5677 (gen_stuff_delay_slot
5678 (GEN_INT (unspec_bbr_uid++),
5679 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5680 insn));
5681 /* Prevent reorg from undoing our splits. */
5682 gen_block_redirect (jump, bp->address += 2, 2);
5685 /* Fix up ADDR_DIFF_VECs. */
5686 void
5687 fixup_addr_diff_vecs (rtx_insn *first)
5689 rtx_insn *insn;
5691 for (insn = first; insn; insn = NEXT_INSN (insn))
5693 rtx vec_lab, pat, prevpat, x, braf_label;
5694 rtx_insn *prev;
5696 if (! JUMP_TABLE_DATA_P (insn)
5697 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5698 continue;
5699 pat = PATTERN (insn);
5700 vec_lab = XEXP (XEXP (pat, 0), 0);
5702 /* Search the matching casesi_jump_2. */
5703 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5705 if (!JUMP_P (prev))
5706 continue;
5707 prevpat = PATTERN (prev);
5708 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5709 continue;
5710 x = XVECEXP (prevpat, 0, 1);
5711 if (GET_CODE (x) != USE)
5712 continue;
5713 x = XEXP (x, 0);
5714 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5715 break;
5717 /* FIXME: This is a bug in the optimizer, but it seems harmless
5718 to just avoid panicing. */
5719 if (!prev)
5720 continue;
5722 /* Emit the reference label of the braf where it belongs, right after
5723 the casesi_jump_2 (i.e. braf). */
5724 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5725 emit_label_after (as_a <rtx_insn *> (braf_label), prev);
5727 /* Fix up the ADDR_DIF_VEC to be relative
5728 to the reference address of the braf. */
5729 XEXP (XEXP (pat, 0), 0) = braf_label;
5733 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5734 a barrier. Return the base 2 logarithm of the desired alignment. */
5736 barrier_align (rtx_insn *barrier_or_label)
5738 if (! barrier_or_label)
5739 return 0;
5741 if (LABEL_P (barrier_or_label)
5742 && NEXT_INSN (barrier_or_label)
5743 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5744 return 2;
5746 if (BARRIER_P (barrier_or_label)
5747 && PREV_INSN (barrier_or_label)
5748 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5750 rtx pat = PATTERN (PREV_INSN (barrier_or_label));
5751 /* If this is a very small table, we want to keep the alignment after
5752 the table to the minimum for proper code alignment. */
5753 return ((optimize_size
5754 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5755 <= (unsigned) 1 << (CACHE_LOG - 2)))
5756 ? 1 : align_jumps.levels[0].log);
5759 rtx_insn *next = next_active_insn (barrier_or_label);
5761 if (! next)
5762 return 0;
5764 rtx pat = PATTERN (next);
5766 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5767 /* This is a barrier in front of a constant table. */
5768 return 0;
5770 if (optimize_size)
5771 return 0;
5773 if (! TARGET_SH2 || ! optimize)
5774 return align_jumps.levels[0].log;
5776 /* When fixing up pcloads, a constant table might be inserted just before
5777 the basic block that ends with the barrier. Thus, we can't trust the
5778 instruction lengths before that. */
5779 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5781 /* Check if there is an immediately preceding branch to the insn beyond
5782 the barrier. We must weight the cost of discarding useful information
5783 from the current cache line when executing this branch and there is
5784 an alignment, against that of fetching unneeded insn in front of the
5785 branch target when there is no alignment. */
5787 /* There are two delay_slot cases to consider. One is the simple case
5788 where the preceding branch is to the insn beyond the barrier (simple
5789 delay slot filling), and the other is where the preceding branch has
5790 a delay slot that is a duplicate of the insn after the barrier
5791 (fill_eager_delay_slots) and the branch is to the insn after the insn
5792 after the barrier. */
5794 int slot, credit;
5795 bool jump_to_next = false;
5797 /* Skip to the insn before the JUMP_INSN before the barrier under
5798 investigation. */
5799 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5801 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5802 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5803 prev = prev_real_insn (prev))
5805 jump_to_next = false;
5806 if (GET_CODE (PATTERN (prev)) == USE
5807 || GET_CODE (PATTERN (prev)) == CLOBBER)
5808 continue;
5809 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5811 prev = prev_seq->insn (1);
5812 if (INSN_UID (prev) == INSN_UID (next))
5814 /* Delay slot was filled with insn at jump target. */
5815 jump_to_next = true;
5816 continue;
5820 if (slot &&
5821 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5822 slot = 0;
5823 credit -= get_attr_length (prev);
5825 if (prev && jump_to_label_p (prev))
5827 rtx_insn *x;
5828 if (jump_to_next
5829 || next_real_insn (JUMP_LABEL_AS_INSN (prev)) == next
5830 /* If relax_delay_slots() decides NEXT was redundant
5831 with some previous instruction, it will have
5832 redirected PREV's jump to the following insn. */
5833 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5834 /* There is no upper bound on redundant instructions
5835 that might have been skipped, but we must not put an
5836 alignment where none had been before. */
5837 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5838 (INSN_P (x)
5839 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5840 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5841 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5843 rtx pat = PATTERN (prev);
5844 if (GET_CODE (pat) == PARALLEL)
5845 pat = XVECEXP (pat, 0, 0);
5846 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5847 return 0;
5852 return align_jumps.levels[0].log;
5855 /* If we are inside a phony loop, almost any kind of label can turn up as the
5856 first one in the loop. Aligning a braf label causes incorrect switch
5857 destination addresses; we can detect braf labels because they are
5858 followed by a BARRIER.
5859 Applying loop alignment to small constant or switch tables is a waste
5860 of space, so we suppress this too. */
5862 sh_loop_align (rtx_insn *label)
5864 rtx_insn *next = label;
5866 if (! optimize || optimize_size)
5867 return 0;
5870 next = next_nonnote_insn (next);
5871 while (next && LABEL_P (next));
5873 if (! next
5874 || ! INSN_P (next)
5875 || recog_memoized (next) == CODE_FOR_consttable_2)
5876 return 0;
5878 return align_loops.levels[0].log;
5881 /* Do a final pass over the function, just before delayed branch
5882 scheduling. */
5883 static void
5884 sh_reorg (void)
5886 rtx_insn *first, *insn, *mova = NULL;
5887 int num_mova;
5888 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5889 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5891 first = get_insns ();
5892 max_labelno_before_reorg = max_label_num ();
5894 /* We must split call insns before introducing `mova's. If we're
5895 optimizing, they'll have already been split. Otherwise, make
5896 sure we don't split them too late. */
5897 if (! optimize)
5898 split_all_insns_noflow ();
5900 /* If relaxing, generate pseudo-ops to associate function calls with
5901 the symbols they call. It does no harm to not generate these
5902 pseudo-ops. However, when we can generate them, it enables the
5903 linker to potentially relax the jsr to a bsr, and eliminate the
5904 register load and, possibly, the constant pool entry. */
5906 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5907 if (TARGET_RELAX)
5909 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5910 own purposes. This works because none of the remaining passes
5911 need to look at them.
5913 ??? But it may break in the future. We should use a machine
5914 dependent REG_NOTE, or some other approach entirely. */
5915 for (insn = first; insn; insn = NEXT_INSN (insn))
5917 if (INSN_P (insn))
5919 rtx note;
5921 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5922 NULL_RTX)) != 0)
5923 remove_note (insn, note);
5927 for (insn = first; insn; insn = NEXT_INSN (insn))
5929 rtx pattern, reg, set, dies;
5930 rtx_code_label *label;
5931 rtx_insn *link, *scan;
5932 int rescan = 0, foundinsn = 0;
5934 if (CALL_P (insn))
5936 pattern = PATTERN (insn);
5938 if (GET_CODE (pattern) == PARALLEL)
5939 pattern = XVECEXP (pattern, 0, 0);
5940 if (GET_CODE (pattern) == SET)
5941 pattern = SET_SRC (pattern);
5943 if (GET_CODE (pattern) != CALL
5944 || !MEM_P (XEXP (pattern, 0)))
5945 continue;
5947 reg = XEXP (XEXP (pattern, 0), 0);
5949 else
5951 reg = sfunc_uses_reg (insn);
5952 if (! reg)
5953 continue;
5956 if (!REG_P (reg))
5957 continue;
5959 /* Try scanning backward to find where the register is set. */
5960 link = NULL;
5961 for (scan = PREV_INSN (insn);
5962 scan && !LABEL_P (scan);
5963 scan = PREV_INSN (scan))
5965 if (! INSN_P (scan))
5966 continue;
5968 if (! reg_mentioned_p (reg, scan))
5969 continue;
5971 if (noncall_uses_reg (reg, scan, &set))
5972 break;
5974 if (set)
5976 link = scan;
5977 break;
5981 if (! link)
5982 continue;
5984 /* The register is set at LINK. */
5986 /* We can only optimize the function call if the register is
5987 being set to a symbol. In theory, we could sometimes
5988 optimize calls to a constant location, but the assembler
5989 and linker do not support that at present. */
5990 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5991 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5992 continue;
5994 /* Scan forward from LINK to the place where REG dies, and
5995 make sure that the only insns which use REG are
5996 themselves function calls. */
5998 /* ??? This doesn't work for call targets that were allocated
5999 by reload, since there may not be a REG_DEAD note for the
6000 register. */
6002 dies = NULL_RTX;
6003 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6005 rtx scanset;
6007 /* Don't try to trace forward past a CODE_LABEL if we haven't
6008 seen INSN yet. Ordinarily, we will only find the setting insn
6009 if it is in the same basic block. However,
6010 cross-jumping can insert code labels in between the load and
6011 the call, and can result in situations where a single call
6012 insn may have two targets depending on where we came from. */
6014 if (LABEL_P (scan) && ! foundinsn)
6015 break;
6017 if (! INSN_P (scan))
6018 continue;
6020 /* Don't try to trace forward past a JUMP. To optimize
6021 safely, we would have to check that all the
6022 instructions at the jump destination did not use REG. */
6024 if (JUMP_P (scan))
6025 break;
6027 if (! reg_mentioned_p (reg, scan))
6028 continue;
6030 if (noncall_uses_reg (reg, scan, &scanset))
6031 break;
6033 if (scan == insn)
6034 foundinsn = 1;
6036 if (scan != insn
6037 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6039 /* There is a function call to this register other
6040 than the one we are checking. If we optimize
6041 this call, we need to rescan again below. */
6042 rescan = 1;
6045 /* ??? We shouldn't have to worry about SCANSET here.
6046 We should just be able to check for a REG_DEAD note
6047 on a function call. However, the REG_DEAD notes are
6048 apparently not dependable around libcalls; c-torture
6049 execute/920501-2 is a test case. If SCANSET is set,
6050 then this insn sets the register, so it must have
6051 died earlier. Unfortunately, this will only handle
6052 the cases in which the register is, in fact, set in a
6053 later insn. */
6055 /* ??? We shouldn't have to use FOUNDINSN here.
6056 This dates back to when we used LOG_LINKS to find
6057 the most recent insn which sets the register. */
6059 if (foundinsn
6060 && (scanset
6061 || find_reg_note (scan, REG_DEAD, reg)))
6063 dies = scan;
6064 break;
6068 if (! dies)
6070 /* Either there was a branch, or some insn used REG
6071 other than as a function call address. */
6072 continue;
6075 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6076 on the insn which sets the register, and on each call insn
6077 which uses the register. In final_prescan_insn we look for
6078 the REG_LABEL_OPERAND notes, and output the appropriate label
6079 or pseudo-op. */
6081 label = gen_label_rtx ();
6082 add_reg_note (link, REG_LABEL_OPERAND, label);
6083 add_reg_note (insn, REG_LABEL_OPERAND, label);
6084 if (rescan)
6086 scan = link;
6089 rtx reg2;
6091 scan = NEXT_INSN (scan);
6092 if (scan != insn
6093 && ((CALL_P (scan)
6094 && reg_mentioned_p (reg, scan))
6095 || ((reg2 = sfunc_uses_reg (scan))
6096 && REGNO (reg2) == REGNO (reg))))
6097 add_reg_note (scan, REG_LABEL_OPERAND, label);
6099 while (scan != dies);
6104 if (TARGET_SH2)
6105 fixup_addr_diff_vecs (first);
6107 if (optimize)
6109 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6110 shorten_branches (first);
6113 /* Scan the function looking for move instructions which have to be
6114 changed to pc-relative loads and insert the literal tables. */
6115 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6116 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6118 if (mova_p (insn))
6120 /* ??? basic block reordering can move a switch table dispatch
6121 below the switch table. Check if that has happened.
6122 We only have the addresses available when optimizing; but then,
6123 this check shouldn't be needed when not optimizing. */
6124 if (!untangle_mova (&num_mova, &mova, insn))
6126 insn = mova;
6127 num_mova = 0;
6130 else if (JUMP_TABLE_DATA_P (insn)
6131 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6132 && num_mova
6133 /* ??? loop invariant motion can also move a mova out of a
6134 loop. Since loop does this code motion anyway, maybe we
6135 should wrap UNSPEC_MOVA into a CONST, so that reload can
6136 move it back. */
6137 && ((num_mova > 1
6138 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6139 || (prev_nonnote_insn (insn)
6140 == XEXP (MOVA_LABELREF (mova), 0))))
6142 rtx_insn *scan;
6143 int total;
6145 num_mova--;
6147 /* Some code might have been inserted between the mova and
6148 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6149 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6150 total += get_attr_length (scan);
6152 /* range of mova is 1020, add 4 because pc counts from address of
6153 second instruction after this one, subtract 2 in case pc is 2
6154 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6155 cancels out with alignment effects of the mova itself. */
6156 if (total > 1022)
6158 /* Change the mova into a load, and restart scanning
6159 there. broken_move will then return true for mova. */
6160 fixup_mova (mova);
6161 insn = mova;
6164 if (broken_move (insn)
6165 || (NONJUMP_INSN_P (insn)
6166 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6168 rtx_insn *scan;
6169 /* Scan ahead looking for a barrier to stick the constant table
6170 behind. */
6171 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6172 rtx_insn *last_float_move = NULL;
6173 rtx last_float = 0, *last_float_addr = NULL;
6174 int need_aligned_label = 0;
6176 if (num_mova && ! mova_p (mova))
6178 /* find_barrier had to change the first mova into a
6179 pcload; thus, we have to start with this new pcload. */
6180 insn = mova;
6181 num_mova = 0;
6183 /* Now find all the moves between the points and modify them. */
6184 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6186 if (LABEL_P (scan))
6187 last_float = 0;
6188 if (NONJUMP_INSN_P (scan)
6189 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6190 need_aligned_label = 1;
6191 if (broken_move (scan))
6193 rtx *patp = &PATTERN (scan), pat = *patp;
6194 rtx src, dst;
6195 rtx lab;
6196 rtx newsrc;
6197 machine_mode mode;
6199 if (GET_CODE (pat) == PARALLEL)
6200 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6201 src = SET_SRC (pat);
6202 dst = SET_DEST (pat);
6203 mode = GET_MODE (dst);
6205 if (mode == SImode && satisfies_constraint_I16 (src)
6206 && REGNO (dst) != FPUL_REG)
6208 int offset = 0;
6210 mode = HImode;
6211 while (GET_CODE (dst) == SUBREG)
6213 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6214 GET_MODE (SUBREG_REG (dst)),
6215 SUBREG_BYTE (dst),
6216 GET_MODE (dst));
6217 dst = SUBREG_REG (dst);
6219 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6221 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6223 /* This must be an insn that clobbers r0. */
6224 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6225 XVECLEN (PATTERN (scan), 0)
6226 - 1);
6227 rtx clobber = *clobberp;
6229 gcc_assert (GET_CODE (clobber) == CLOBBER
6230 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6232 if (last_float
6233 && reg_set_between_p (r0_rtx, last_float_move, scan))
6234 last_float = 0;
6235 lab = add_constant (src, mode, last_float);
6236 if (lab)
6237 emit_insn_before (gen_mova (lab), scan);
6238 else
6240 /* There will be a REG_UNUSED note for r0 on
6241 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6242 lest reorg:mark_target_live_regs will not
6243 consider r0 to be used, and we end up with delay
6244 slot insn in front of SCAN that clobbers r0. */
6245 rtx note
6246 = find_regno_note (last_float_move, REG_UNUSED, 0);
6248 /* If we are not optimizing, then there may not be
6249 a note. */
6250 if (note)
6251 PUT_REG_NOTE_KIND (note, REG_INC);
6253 *last_float_addr = r0_inc_rtx;
6255 last_float_move = scan;
6256 last_float = src;
6257 newsrc = gen_const_mem (mode,
6258 (((TARGET_SH4 && ! TARGET_FMOVD)
6259 || REGNO (dst) == FPUL_REG)
6260 ? r0_inc_rtx
6261 : r0_rtx));
6262 last_float_addr = &XEXP (newsrc, 0);
6264 /* Remove the clobber of r0. */
6265 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6266 gen_rtx_SCRATCH (Pmode));
6268 /* This is a mova needing a label. Create it. */
6269 else if (GET_CODE (src) == UNSPEC
6270 && XINT (src, 1) == UNSPEC_MOVA
6271 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6273 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6274 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6275 newsrc = gen_rtx_UNSPEC (SImode,
6276 gen_rtvec (1, newsrc),
6277 UNSPEC_MOVA);
6279 else if (GET_CODE (src) == UNSPEC_VOLATILE
6280 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6282 newsrc = XVECEXP (src, 0, 0);
6283 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6284 INSN_CODE (scan) = -1;
6285 continue;
6287 else
6289 lab = add_constant (src, mode, 0);
6290 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6291 newsrc = gen_const_mem (mode, newsrc);
6293 *patp = gen_rtx_SET (dst, newsrc);
6294 INSN_CODE (scan) = -1;
6297 dump_table (need_aligned_label ? insn : 0, barrier);
6298 insn = barrier;
6301 label_ref_list_d_pool.release ();
6302 for (insn = first; insn; insn = NEXT_INSN (insn))
6303 PUT_MODE (insn, VOIDmode);
6305 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6306 INSN_ADDRESSES_FREE ();
6307 split_branches (first);
6309 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6310 also has an effect on the register that holds the address of the sfunc.
6311 Insert an extra dummy insn in front of each sfunc that pretends to
6312 use this register. */
6313 if (flag_delayed_branch)
6315 for (insn = first; insn; insn = NEXT_INSN (insn))
6317 rtx reg = sfunc_uses_reg (insn);
6319 if (! reg)
6320 continue;
6321 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6324 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6327 /* Return the UID of the insn that follows the specified label. */
6329 get_dest_uid (rtx_insn *label, int max_uid)
6331 rtx_insn *dest = next_real_insn (label);
6333 if (! dest)
6334 /* This can happen for an undefined label. */
6335 return 0;
6336 int dest_uid = INSN_UID (dest);
6337 /* If this is a newly created branch redirection blocking instruction,
6338 we cannot index the branch_uid or insn_addresses arrays with its
6339 uid. But then, we won't need to, because the actual destination is
6340 the following branch. */
6341 while (dest_uid >= max_uid)
6343 dest = NEXT_INSN (dest);
6344 dest_uid = INSN_UID (dest);
6346 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6347 return 0;
6348 return dest_uid;
6351 /* Split condbranches that are out of range. Also add clobbers for
6352 scratch registers that are needed in far jumps.
6353 We do this before delay slot scheduling, so that it can take our
6354 newly created instructions into account. It also allows us to
6355 find branches with common targets more easily. */
6356 static void
6357 split_branches (rtx_insn *first)
6359 rtx_insn *insn;
6360 struct far_branch **uid_branch, *far_branch_list = 0;
6361 int max_uid = get_max_uid ();
6362 int ok;
6364 /* Find out which branches are out of range. */
6365 shorten_branches (first);
6367 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6368 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6370 for (insn = first; insn; insn = NEXT_INSN (insn))
6371 if (! INSN_P (insn))
6372 continue;
6373 else if (insn->deleted ())
6375 /* Shorten_branches would split this instruction again,
6376 so transform it into a note. */
6377 SET_INSN_DELETED (insn);
6379 else if (JUMP_P (insn))
6381 enum attr_type type = get_attr_type (insn);
6382 if (type == TYPE_CBRANCH)
6384 rtx_insn *next, *beyond;
6386 if (get_attr_length (insn) > 4)
6388 rtx src = SET_SRC (PATTERN (insn));
6389 rtx_insn *olabel = safe_as_a <rtx_insn *> (XEXP (XEXP (src, 1), 0));
6390 int addr = INSN_ADDRESSES (INSN_UID (insn));
6391 rtx_insn *label = 0;
6392 int dest_uid = get_dest_uid (olabel, max_uid);
6393 struct far_branch *bp = uid_branch[dest_uid];
6395 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6396 the label if the LABEL_NUSES count drops to zero. There is
6397 always a jump_optimize pass that sets these values, but it
6398 proceeds to delete unreferenced code, and then if not
6399 optimizing, to un-delete the deleted instructions, thus
6400 leaving labels with too low uses counts. */
6401 if (! optimize)
6403 JUMP_LABEL (insn) = olabel;
6404 LABEL_NUSES (olabel)++;
6406 if (! bp)
6408 bp = (struct far_branch *) alloca (sizeof *bp);
6409 uid_branch[dest_uid] = bp;
6410 bp->prev = far_branch_list;
6411 far_branch_list = bp;
6412 bp->far_label = as_a <rtx_insn *> (
6413 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6414 0));
6415 LABEL_NUSES (bp->far_label)++;
6417 else
6419 label = bp->near_label;
6420 if (! label && bp->address - addr >= CONDJUMP_MIN)
6422 rtx_insn *block = bp->insert_place;
6424 if (GET_CODE (PATTERN (block)) == RETURN)
6425 block = PREV_INSN (block);
6426 else
6427 block = gen_block_redirect (block,
6428 bp->address, 2);
6429 label = emit_label_after (gen_label_rtx (),
6430 PREV_INSN (block));
6431 bp->near_label = label;
6433 else if (label && ! NEXT_INSN (label))
6435 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6436 bp->insert_place = insn;
6437 else
6438 gen_far_branch (bp);
6441 if (! label
6442 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6444 bp->near_label = label = gen_label_rtx ();
6445 bp->insert_place = insn;
6446 bp->address = addr;
6448 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6449 gcc_assert (ok);
6451 else
6453 /* get_attr_length (insn) == 2 */
6454 /* Check if we have a pattern where reorg wants to redirect
6455 the branch to a label from an unconditional branch that
6456 is too far away. */
6457 /* We can't use JUMP_LABEL here because it might be undefined
6458 when not optimizing. */
6459 /* A syntax error might cause beyond to be NULL_RTX. */
6460 rtx temp = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6461 beyond = next_active_insn (as_a<rtx_insn *> (temp));
6463 if (beyond
6464 && (JUMP_P (beyond)
6465 || ((beyond = next_active_insn (beyond))
6466 && JUMP_P (beyond)))
6467 && GET_CODE (PATTERN (beyond)) == SET
6468 && recog_memoized (beyond) == CODE_FOR_jump_compact
6469 && ((INSN_ADDRESSES
6470 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6471 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6472 > 252 + 258 + 2))
6473 gen_block_redirect (beyond,
6474 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6477 next = next_active_insn (insn);
6479 if (next
6480 && (JUMP_P (next)
6481 || ((next = next_active_insn (next))
6482 && JUMP_P (next)))
6483 && GET_CODE (PATTERN (next)) == SET
6484 && recog_memoized (next) == CODE_FOR_jump_compact
6485 && ((INSN_ADDRESSES
6486 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6487 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6488 > 252 + 258 + 2))
6489 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6491 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6493 int addr = INSN_ADDRESSES (INSN_UID (insn));
6494 rtx_insn *far_label = 0;
6495 int dest_uid = 0;
6496 struct far_branch *bp;
6498 if (type == TYPE_JUMP)
6500 if (CROSSING_JUMP_P (insn))
6502 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6503 insn);
6504 continue;
6507 far_label = as_a <rtx_insn *> (
6508 XEXP (SET_SRC (PATTERN (insn)), 0));
6509 dest_uid = get_dest_uid (far_label, max_uid);
6510 if (! dest_uid)
6512 /* Parse errors can lead to labels outside
6513 the insn stream. */
6514 if (! NEXT_INSN (far_label))
6515 continue;
6517 if (! optimize)
6519 JUMP_LABEL (insn) = far_label;
6520 LABEL_NUSES (far_label)++;
6522 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6523 far_label = 0;
6526 bp = uid_branch[dest_uid];
6527 if (! bp)
6529 bp = (struct far_branch *) alloca (sizeof *bp);
6530 uid_branch[dest_uid] = bp;
6531 bp->prev = far_branch_list;
6532 far_branch_list = bp;
6533 bp->near_label = 0;
6534 bp->far_label = far_label;
6535 if (far_label)
6536 LABEL_NUSES (far_label)++;
6538 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6539 if (addr - bp->address <= CONDJUMP_MAX)
6540 emit_label_after (bp->near_label, PREV_INSN (insn));
6541 else
6543 gen_far_branch (bp);
6544 bp->near_label = 0;
6546 else
6547 bp->near_label = 0;
6548 bp->address = addr;
6549 bp->insert_place = insn;
6550 if (! far_label)
6551 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6552 else
6553 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6556 /* Generate all pending far branches,
6557 and free our references to the far labels. */
6558 while (far_branch_list)
6560 if (far_branch_list->near_label
6561 && ! NEXT_INSN (far_branch_list->near_label))
6562 gen_far_branch (far_branch_list);
6563 if (optimize
6564 && far_branch_list->far_label
6565 && ! --LABEL_NUSES (far_branch_list->far_label))
6566 delete_insn (far_branch_list->far_label);
6567 far_branch_list = far_branch_list->prev;
6570 /* Instruction length information is no longer valid due to the new
6571 instructions that have been generated. */
6572 init_insn_lengths ();
6575 /* Dump out instruction addresses, which is useful for debugging the
6576 constant pool table stuff.
6578 If relaxing, output the label and pseudo-ops used to link together
6579 calls and the instruction which set the registers.
6581 ??? The addresses printed by this routine for insns are nonsense for
6582 insns which are inside of a sequence where none of the inner insns have
6583 variable length. This is because the second pass of shorten_branches
6584 does not bother to update them. */
6585 void
6586 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6587 int noperands ATTRIBUTE_UNUSED)
6589 if (TARGET_DUMPISIZE)
6590 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6592 if (TARGET_RELAX)
6594 if (rtx note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX))
6596 rtx pattern = PATTERN (insn);
6597 if (GET_CODE (pattern) == PARALLEL)
6598 pattern = XVECEXP (pattern, 0, 0);
6599 switch (GET_CODE (pattern))
6601 case SET:
6602 if (GET_CODE (SET_SRC (pattern)) != CALL
6603 && get_attr_type (insn) != TYPE_SFUNC)
6605 targetm.asm_out.internal_label
6606 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6607 break;
6609 /* FALLTHROUGH */
6610 case CALL:
6611 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6612 CODE_LABEL_NUMBER (XEXP (note, 0)));
6613 break;
6615 default:
6616 gcc_unreachable ();
6622 /* Dump out any constants accumulated in the final pass. These will
6623 only be labels. */
6624 const char *
6625 output_jump_label_table (void)
6627 if (pool_size)
6629 fprintf (asm_out_file, "\t.align 2\n");
6630 for (int i = 0; i < pool_size; i++)
6632 pool_node *p = &pool_vector[i];
6634 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6635 CODE_LABEL_NUMBER (p->label));
6636 output_asm_insn (".long %O0", &p->value);
6638 pool_size = 0;
6641 return "";
6644 /* A full frame looks like:
6646 arg-5
6647 arg-4
6648 [ if current_function_anonymous_args
6649 arg-3
6650 arg-2
6651 arg-1
6652 arg-0 ]
6653 saved-fp
6654 saved-r10
6655 saved-r11
6656 saved-r12
6657 saved-pr
6658 local-n
6660 local-1
6661 local-0 <- fp points here.
6663 Number of bytes pushed for anonymous args, used to pass information
6664 between expand_prologue and expand_epilogue.
6666 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6667 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6668 for an epilogue and a negative value means that it's for a sibcall
6669 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6670 all the registers that are about to be restored, and hence dead. */
6671 static void
6672 output_stack_adjust (int size, rtx reg, int epilogue_p,
6673 HARD_REG_SET *live_regs_mask, bool frame_p)
6675 rtx_insn *(*emit_fn) (rtx) = frame_p ? &emit_frame_insn : &emit_insn;
6676 if (size)
6678 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6680 /* This test is bogus, as output_stack_adjust is used to re-align the
6681 stack. */
6682 #if 0
6683 gcc_assert (!(size % align));
6684 #endif
6686 if (CONST_OK_FOR_ADD (size))
6687 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6688 /* Try to do it with two partial adjustments; however, we must make
6689 sure that the stack is properly aligned at all times, in case
6690 an interrupt occurs between the two partial adjustments. */
6691 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6692 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6694 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6695 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6697 else
6699 rtx const_reg;
6700 rtx insn;
6701 int temp = epilogue_p ? 7 : 1;
6702 int i;
6704 /* If TEMP is invalid, we could temporarily save a general
6705 register to MACL. However, there is currently no need
6706 to handle this case, so just die when we see it. */
6707 if (epilogue_p < 0
6708 || current_function_interrupt
6709 || ! call_really_used_regs[temp] || fixed_regs[temp])
6710 temp = -1;
6711 if (temp < 0 && ! current_function_interrupt && epilogue_p >= 0)
6713 HARD_REG_SET temps;
6714 COPY_HARD_REG_SET (temps, call_used_reg_set);
6715 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6716 if (epilogue_p > 0)
6718 int nreg = 0;
6719 if (crtl->return_rtx)
6721 machine_mode mode;
6722 mode = GET_MODE (crtl->return_rtx);
6723 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6724 nreg = hard_regno_nregs (FIRST_RET_REG, mode);
6726 for (i = 0; i < nreg; i++)
6727 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6728 if (crtl->calls_eh_return)
6730 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6731 for (i = 0; i <= 3; i++)
6732 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6735 if (epilogue_p <= 0)
6737 for (i = FIRST_PARM_REG;
6738 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6739 CLEAR_HARD_REG_BIT (temps, i);
6740 if (cfun->static_chain_decl != NULL)
6741 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6743 temp = scavenge_reg (&temps);
6745 if (temp < 0 && live_regs_mask)
6747 HARD_REG_SET temps;
6749 COPY_HARD_REG_SET (temps, *live_regs_mask);
6750 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6751 temp = scavenge_reg (&temps);
6753 if (temp < 0)
6755 rtx adj_reg, tmp_reg, mem;
6757 /* If we reached here, the most likely case is the (sibcall)
6758 epilogue. Put a special push/pop sequence for such case as
6759 the last resort. This looks lengthy but would not be problem
6760 because it seems to be very rare. */
6761 gcc_assert (epilogue_p);
6763 /* ??? There is still the slight possibility that r4 or
6764 r5 have been reserved as fixed registers or assigned
6765 as global registers, and they change during an
6766 interrupt. There are possible ways to handle this:
6768 - If we are adjusting the frame pointer (r14), we can do
6769 with a single temp register and an ordinary push / pop
6770 on the stack.
6771 - Grab any call-used or call-saved registers (i.e. not
6772 fixed or globals) for the temps we need. We might
6773 also grab r14 if we are adjusting the stack pointer.
6774 If we can't find enough available registers, issue
6775 a diagnostic and die - the user must have reserved
6776 way too many registers.
6777 But since all this is rather unlikely to happen and
6778 would require extra testing, we just die if r4 / r5
6779 are not available. */
6780 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6781 && !global_regs[4] && !global_regs[5]);
6783 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6784 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6785 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6786 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6787 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6788 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6789 emit_move_insn (mem, tmp_reg);
6790 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6791 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6792 emit_move_insn (mem, tmp_reg);
6793 emit_move_insn (reg, adj_reg);
6794 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6795 emit_move_insn (adj_reg, mem);
6796 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6797 emit_move_insn (tmp_reg, mem);
6798 /* Tell flow the insns that pop r4/r5 aren't dead. */
6799 emit_use (tmp_reg);
6800 emit_use (adj_reg);
6801 return;
6803 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6805 /* If SIZE is negative, subtract the positive value.
6806 This sometimes allows a constant pool entry to be shared
6807 between prologue and epilogue code. */
6808 if (size < 0)
6810 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6811 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6813 else
6815 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6816 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6818 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6819 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
6820 GEN_INT (size))));
6825 /* Emit the specified insn and mark it as frame related. */
6826 static rtx_insn *
6827 emit_frame_insn (rtx x)
6829 rtx_insn *insn = emit_insn (x);
6830 RTX_FRAME_RELATED_P (insn) = 1;
6831 return insn;
6834 /* Output RTL to push register RN onto the stack. */
6835 static rtx
6836 push (int rn)
6838 rtx x;
6839 if (rn == FPUL_REG)
6840 x = gen_push_fpul ();
6841 else if (rn == FPSCR_REG)
6842 x = gen_push_fpscr ();
6843 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6844 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6846 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6847 return NULL_RTX;
6848 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6850 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6851 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6852 else
6853 x = gen_push (gen_rtx_REG (SImode, rn));
6855 x = emit_frame_insn (x);
6856 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6857 return x;
6860 /* Output RTL to pop register RN from the stack. */
6861 static void
6862 pop (int rn)
6864 rtx x, sp_reg, reg;
6865 if (rn == FPUL_REG)
6866 x = gen_pop_fpul ();
6867 else if (rn == FPSCR_REG)
6868 x = gen_pop_fpscr ();
6869 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6870 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6872 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6873 return;
6874 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6876 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6877 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6878 else
6879 x = gen_pop (gen_rtx_REG (SImode, rn));
6881 x = emit_insn (x);
6883 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6884 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6885 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6886 : SET_DEST (PATTERN (x)));
6887 add_reg_note (x, REG_CFA_RESTORE, reg);
6888 add_reg_note (x, REG_CFA_ADJUST_CFA,
6889 gen_rtx_SET (sp_reg,
6890 plus_constant (SImode, sp_reg,
6891 GET_MODE_SIZE (GET_MODE (reg)))));
6892 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6893 RTX_FRAME_RELATED_P (x) = 1;
6896 /* Generate code to push the regs specified in the mask. */
6897 static void
6898 push_regs (HARD_REG_SET *mask, bool interrupt_handler)
6900 bool skip_fpscr = false;
6902 /* Push PR last; this gives better latencies after the prologue, and
6903 candidates for the return delay slot when there are no general
6904 registers pushed. */
6905 for (int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6906 i < FIRST_PSEUDO_REGISTER; i++)
6908 /* If this is an interrupt handler, and the SZ bit varies,
6909 and we have to push any floating point register, we need
6910 to switch to the correct precision first. */
6911 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6912 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6914 HARD_REG_SET unsaved;
6916 push (FPSCR_REG);
6917 COMPL_HARD_REG_SET (unsaved, *mask);
6918 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6919 skip_fpscr = true;
6921 if (i != PR_REG
6922 && (i != FPSCR_REG || ! skip_fpscr)
6923 && TEST_HARD_REG_BIT (*mask, i))
6925 /* If the ISR has RESBANK attribute assigned, don't push any of
6926 the following registers - R0-R14, MACH, MACL and GBR. */
6927 if (! (sh_cfun_resbank_handler_p ()
6928 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6929 || i == MACH_REG
6930 || i == MACL_REG
6931 || i == GBR_REG)))
6932 push (i);
6936 /* Push banked registers last to improve delay slot opportunities. */
6937 if (interrupt_handler)
6939 bool use_movml = false;
6941 if (TARGET_SH2A)
6943 unsigned int count = 0;
6945 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6946 if (TEST_HARD_REG_BIT (*mask, i))
6947 count++;
6948 else
6949 break;
6951 /* Use movml when all banked registers are pushed. */
6952 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6953 use_movml = true;
6956 if (sh_cfun_resbank_handler_p ())
6957 ; /* Do nothing. */
6958 else if (use_movml)
6960 rtx x, mem, reg, set;
6961 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6963 /* We must avoid scheduling multiple store insn with another
6964 insns. */
6965 emit_insn (gen_blockage ());
6966 x = gen_movml_push_banked (sp_reg);
6967 x = emit_frame_insn (x);
6968 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6970 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
6971 reg = gen_rtx_REG (SImode, i);
6972 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6975 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
6976 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6977 emit_insn (gen_blockage ());
6979 else
6980 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6981 if (TEST_HARD_REG_BIT (*mask, i))
6982 push (i);
6985 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6986 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6987 push (PR_REG);
6990 /* Work out the registers which need to be saved, both as a mask and a
6991 count of saved words. Return the count.
6993 If doing a pragma interrupt function, then push all regs used by the
6994 function, and if we call another function (we can tell by looking at PR),
6995 make sure that all the regs it clobbers are safe too. */
6996 static int
6997 calc_live_regs (HARD_REG_SET *live_regs_mask)
6999 unsigned int reg;
7000 tree attrs;
7001 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7002 bool nosave_low_regs;
7004 attrs = DECL_ATTRIBUTES (current_function_decl);
7005 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7006 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7007 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7008 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7010 CLEAR_HARD_REG_SET (*live_regs_mask);
7011 if (TARGET_FPU_DOUBLE && TARGET_FMOVD && interrupt_handler
7012 && df_regs_ever_live_p (FPSCR_REG))
7013 target_flags &= ~MASK_FPU_SINGLE;
7014 /* If we can save a lot of saves by switching to double mode, do that. */
7015 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD && TARGET_FPU_SINGLE)
7016 for (int count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7017 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7018 && (! call_really_used_regs[reg]
7019 || interrupt_handler)
7020 && ++count > 2)
7022 target_flags &= ~MASK_FPU_SINGLE;
7023 break;
7027 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7028 bool pr_live = (pr_initial
7029 ? (!REG_P (pr_initial)
7030 || REGNO (pr_initial) != (PR_REG))
7031 : df_regs_ever_live_p (PR_REG));
7032 /* For Shcompact, if not optimizing, we end up with a memory reference
7033 using the return address pointer for __builtin_return_address even
7034 though there is no actual need to put the PR register on the stack. */
7035 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7037 /* Force PR to be live if the prologue has to call the SHmedia
7038 argument decoder or register saver. */
7039 bool has_call = pr_live;
7041 int count;
7042 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7044 if (reg == PR_REG
7045 ? pr_live
7046 : interrupt_handler
7047 ? (/* Need to save all the regs ever live. */
7048 (df_regs_ever_live_p (reg)
7049 || (call_really_used_regs[reg]
7050 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7051 || reg == PIC_OFFSET_TABLE_REGNUM)
7052 && has_call))
7053 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7054 && reg != RETURN_ADDRESS_POINTER_REGNUM
7055 && reg != T_REG && reg != GBR_REG
7056 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7057 /* Push fpscr only on targets which have FPU */
7058 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7059 : (/* Only push those regs which are used and need to be saved. */
7060 (false)
7061 || (df_regs_ever_live_p (reg)
7062 && ((!call_really_used_regs[reg]
7063 && !(reg != PIC_OFFSET_TABLE_REGNUM
7064 && fixed_regs[reg] && call_used_regs[reg]))
7065 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7066 || (crtl->calls_eh_return
7067 && (reg == EH_RETURN_DATA_REGNO (0)
7068 || reg == EH_RETURN_DATA_REGNO (1)
7069 || reg == EH_RETURN_DATA_REGNO (2)
7070 || reg == EH_RETURN_DATA_REGNO (3)))
7071 || ((reg == MACL_REG || reg == MACH_REG)
7072 && df_regs_ever_live_p (reg)
7073 && sh_cfun_attr_renesas_p ())
7076 SET_HARD_REG_BIT (*live_regs_mask, reg);
7077 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7079 if (TARGET_FPU_DOUBLE && TARGET_FMOVD
7080 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7082 if (FP_REGISTER_P (reg))
7084 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7086 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7087 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7090 else if (XD_REGISTER_P (reg))
7092 /* Must switch to double mode to access these registers. */
7093 target_flags &= ~MASK_FPU_SINGLE;
7097 if (nosave_low_regs && reg == R8_REG)
7098 break;
7101 return count;
7104 /* Code to generate prologue and epilogue sequences */
7106 /* PUSHED is the number of bytes that are being pushed on the
7107 stack for register saves. Return the frame size, padded
7108 appropriately so that the stack stays properly aligned. */
7109 static HOST_WIDE_INT
7110 rounded_frame_size (int pushed)
7112 HOST_WIDE_INT size = get_frame_size ();
7113 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7115 if (ACCUMULATE_OUTGOING_ARGS)
7116 size += crtl->outgoing_args_size;
7118 return ((size + pushed + align - 1) & -align) - pushed;
7121 /* Expand code for the function prologue. */
7122 void
7123 sh_expand_prologue (void)
7125 int save_flags = target_flags;
7126 tree sp_switch_attr
7127 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7129 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7131 /* We have pretend args if we had an object sent partially in registers
7132 and partially on the stack, e.g. a large structure. */
7133 int pretend_args = crtl->args.pretend_args_size;
7134 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7135 && (NPARM_REGS(SImode)
7136 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7137 pretend_args = 0;
7139 output_stack_adjust (-pretend_args, stack_pointer_rtx, 0, NULL, true);
7140 int stack_usage = pretend_args;
7142 /* Emit the code for SETUP_VARARGS. */
7143 if (cfun->stdarg)
7145 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7147 /* Push arg regs as if they'd been provided by caller in stack. */
7148 for (int i = 0; i < NPARM_REGS(SImode); i++)
7150 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7152 if (i >= (NPARM_REGS(SImode)
7153 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7155 break;
7156 push (rn);
7157 stack_usage += GET_MODE_SIZE (SImode);
7162 /* If we're supposed to switch stacks at function entry, do so now. */
7163 if (sp_switch_attr)
7165 rtx lab, newsrc;
7166 /* The argument specifies a variable holding the address of the
7167 stack the interrupt function should switch to/from at entry/exit. */
7168 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7169 const char* s = ggc_strdup (TREE_STRING_POINTER (arg));
7170 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7172 lab = add_constant (sp_switch, SImode, 0);
7173 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7175 emit_insn (gen_sp_switch_1 (newsrc));
7178 HARD_REG_SET live_regs_mask;
7179 int d = calc_live_regs (&live_regs_mask);
7180 /* ??? Maybe we could save some switching if we can move a mode switch
7181 that already happens to be at the function start into the prologue. */
7182 if (target_flags != save_flags && ! current_function_interrupt)
7183 emit_insn (gen_toggle_sz ());
7185 push_regs (&live_regs_mask, current_function_interrupt);
7186 stack_usage += d;
7188 if (flag_pic && !TARGET_FDPIC
7189 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7190 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7192 if (target_flags != save_flags && ! current_function_interrupt)
7193 emit_insn (gen_toggle_sz ());
7195 target_flags = save_flags;
7197 output_stack_adjust (-rounded_frame_size (d),
7198 stack_pointer_rtx, 0, NULL, true);
7199 stack_usage += rounded_frame_size (d);
7201 if (frame_pointer_needed)
7202 emit_frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7204 /* If we are profiling, make sure no instructions are scheduled before
7205 the call to mcount. Similarly if some call instructions are swapped
7206 before frame related insns, it'll confuse the unwinder because
7207 currently SH has no unwind info for function epilogues. */
7208 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7209 emit_insn (gen_blockage ());
7211 if (flag_stack_usage_info)
7212 current_function_static_stack_size = stack_usage;
7215 /* Expand code for the function epilogue. */
7216 void
7217 sh_expand_epilogue (bool sibcall_p)
7219 int save_flags = target_flags;
7220 bool fpscr_deferred = false;
7221 int e = sibcall_p ? -1 : 1;
7223 HARD_REG_SET live_regs_mask;
7224 int d = calc_live_regs (&live_regs_mask);
7226 int save_size = d;
7227 int frame_size = rounded_frame_size (d);
7229 if (frame_pointer_needed)
7231 /* We must avoid scheduling the epilogue with previous basic blocks.
7232 See PR/18032 and PR/40313. */
7233 emit_insn (gen_blockage ());
7234 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7235 &live_regs_mask, true);
7237 /* We must avoid moving the stack pointer adjustment past code
7238 which reads from the local frame, else an interrupt could
7239 occur after the SP adjustment and clobber data in the local
7240 frame. */
7241 emit_insn (gen_blockage ());
7242 emit_frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7244 else if (frame_size)
7246 /* We must avoid moving the stack pointer adjustment past code
7247 which reads from the local frame, else an interrupt could
7248 occur after the SP adjustment and clobber data in the local
7249 frame. */
7250 emit_insn (gen_blockage ());
7251 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7252 &live_regs_mask, true);
7255 /* Pop all the registers. */
7257 if (target_flags != save_flags && ! current_function_interrupt)
7258 emit_insn (gen_toggle_sz ());
7261 int last_reg;
7263 save_size = 0;
7264 /* For an ISR with RESBANK attribute assigned, don't pop PR
7265 register. */
7266 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7267 && !sh_cfun_resbank_handler_p ())
7269 if (!frame_pointer_needed)
7270 emit_insn (gen_blockage ());
7271 pop (PR_REG);
7274 /* Banked registers are popped first to avoid being scheduled in the
7275 delay slot. RTE switches banks before the ds instruction. */
7276 if (current_function_interrupt)
7278 bool use_movml = false;
7280 if (TARGET_SH2A)
7282 unsigned int count = 0;
7284 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7285 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7286 count++;
7287 else
7288 break;
7290 /* Use movml when all banked register are poped. */
7291 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7292 use_movml = true;
7295 if (sh_cfun_resbank_handler_p ())
7296 ; /* Do nothing. */
7297 else if (use_movml)
7299 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7301 /* We must avoid scheduling multiple load insn with another
7302 insns. */
7303 emit_insn (gen_blockage ());
7304 emit_insn (gen_movml_pop_banked (sp_reg));
7305 emit_insn (gen_blockage ());
7307 else
7308 for (int i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7309 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7310 pop (i);
7312 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7314 else
7315 last_reg = FIRST_PSEUDO_REGISTER;
7317 for (int i = 0; i < last_reg; i++)
7319 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7321 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7322 && hard_reg_set_intersect_p (live_regs_mask,
7323 reg_class_contents[DF_REGS]))
7324 fpscr_deferred = true;
7325 /* For an ISR with RESBANK attribute assigned, don't pop
7326 following registers, R0-R14, MACH, MACL and GBR. */
7327 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7328 && ! (sh_cfun_resbank_handler_p ()
7329 && ((j >= FIRST_GENERAL_REG
7330 && j < LAST_GENERAL_REG)
7331 || j == MACH_REG
7332 || j == MACL_REG
7333 || j == GBR_REG)))
7334 pop (j);
7336 if (j == FIRST_FP_REG && fpscr_deferred)
7337 pop (FPSCR_REG);
7340 if (target_flags != save_flags && ! current_function_interrupt)
7341 emit_insn (gen_toggle_sz ());
7342 target_flags = save_flags;
7344 output_stack_adjust (crtl->args.pretend_args_size + save_size,
7345 stack_pointer_rtx, e, NULL, true);
7347 if (crtl->calls_eh_return)
7348 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7349 EH_RETURN_STACKADJ_RTX));
7351 /* Switch back to the normal stack if necessary. */
7352 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7353 emit_insn (gen_sp_switch_2 ());
7355 /* Tell flow the insn that pops PR isn't dead. */
7356 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7357 emit_use (gen_rtx_REG (SImode, PR_REG));
7360 /* Emit code to change the current function's return address to RA.
7361 TEMP is available as a scratch register, if needed. */
7362 void
7363 sh_set_return_address (rtx ra, rtx tmp)
7365 HARD_REG_SET live_regs_mask;
7366 int d = calc_live_regs (&live_regs_mask);
7368 /* If pr_reg isn't life, we can set it directly. */
7369 if (! TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7371 rtx rr = gen_rtx_REG (SImode, PR_REG);
7372 emit_insn (GEN_MOV (rr, ra));
7373 /* Tell flow the register for return isn't dead. */
7374 emit_use (rr);
7375 return;
7378 int pr_offset = rounded_frame_size (d);
7380 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7382 if (frame_pointer_needed)
7383 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7384 else
7385 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7387 tmp = gen_frame_mem (Pmode, tmp);
7388 emit_insn (GEN_MOV (tmp, ra));
7389 /* Tell this store isn't dead. */
7390 emit_use (tmp);
7393 /* Clear variables at function end. */
7394 static void
7395 sh_output_function_epilogue (FILE *)
7399 static rtx
7400 sh_builtin_saveregs (void)
7402 /* First unnamed integer register. */
7403 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7404 /* Number of integer registers we need to save. */
7405 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7406 /* First unnamed SFmode float reg */
7407 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7408 /* Number of SFmode float regs to save. */
7409 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7410 rtx regbuf, fpregs;
7411 int bufsize, regno;
7412 alias_set_type alias_set;
7414 if (!TARGET_FPU_ANY)
7416 error ("__builtin_saveregs not supported by this subtarget");
7417 return const0_rtx;
7420 /* Allocate block of memory for the regs. */
7421 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7422 Or can assign_stack_local accept a 0 SIZE argument? */
7423 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7425 if (n_floatregs & 1)
7427 rtx addr;
7429 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7430 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7431 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7432 regbuf = change_address (regbuf, BLKmode, addr);
7434 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7436 rtx addr, mask;
7438 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7439 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
7440 XEXP (regbuf, 0), 4));
7441 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7442 emit_insn (gen_andsi3 (addr, addr, mask));
7443 regbuf = change_address (regbuf, BLKmode, addr);
7445 else
7446 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7447 alias_set = get_varargs_alias_set ();
7448 set_mem_alias_set (regbuf, alias_set);
7450 /* Save int args.
7451 This is optimized to only save the regs that are necessary. Explicitly
7452 named args need not be saved. */
7453 if (n_intregs > 0)
7454 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7455 adjust_address (regbuf, BLKmode,
7456 n_floatregs * UNITS_PER_WORD),
7457 n_intregs);
7459 /* Save float args.
7460 This is optimized to only save the regs that are necessary. Explicitly
7461 named args need not be saved.
7462 We explicitly build a pointer to the buffer because it halves the insn
7463 count when not optimizing (otherwise the pointer is built for each reg
7464 saved).
7465 We emit the moves in reverse order so that we can use predecrement. */
7467 fpregs = copy_to_mode_reg (Pmode,
7468 plus_constant (Pmode, XEXP (regbuf, 0),
7469 n_floatregs * UNITS_PER_WORD));
7470 if (TARGET_FPU_DOUBLE)
7472 rtx mem;
7473 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7475 emit_insn (gen_addsi3 (fpregs, fpregs,
7476 GEN_INT (-2 * UNITS_PER_WORD)));
7477 mem = change_address (regbuf, DFmode, fpregs);
7478 emit_move_insn (mem,
7479 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7481 regno = first_floatreg;
7482 if (regno & 1)
7484 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7485 mem = change_address (regbuf, SFmode, fpregs);
7486 emit_move_insn (mem,
7487 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
7488 + regno - SH_REG_MSW_OFFSET));
7491 else
7492 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7494 rtx mem;
7496 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7497 mem = change_address (regbuf, SFmode, fpregs);
7498 emit_move_insn (mem,
7499 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7502 /* Return the address of the regbuf. */
7503 return XEXP (regbuf, 0);
7506 /* Define the `__builtin_va_list' type for the ABI. */
7507 static tree
7508 sh_build_builtin_va_list (void)
7510 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7511 tree record, type_decl;
7513 if ((! TARGET_SH2E && ! TARGET_SH4)
7514 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7515 return ptr_type_node;
7517 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7518 type_decl = build_decl (BUILTINS_LOCATION,
7519 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7521 f_next_o = build_decl (BUILTINS_LOCATION,
7522 FIELD_DECL, get_identifier ("__va_next_o"),
7523 ptr_type_node);
7524 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7525 FIELD_DECL,
7526 get_identifier ("__va_next_o_limit"),
7527 ptr_type_node);
7528 f_next_fp = build_decl (BUILTINS_LOCATION,
7529 FIELD_DECL, get_identifier ("__va_next_fp"),
7530 ptr_type_node);
7531 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7532 FIELD_DECL,
7533 get_identifier ("__va_next_fp_limit"),
7534 ptr_type_node);
7535 f_next_stack = build_decl (BUILTINS_LOCATION,
7536 FIELD_DECL, get_identifier ("__va_next_stack"),
7537 ptr_type_node);
7539 DECL_FIELD_CONTEXT (f_next_o) = record;
7540 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7541 DECL_FIELD_CONTEXT (f_next_fp) = record;
7542 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7543 DECL_FIELD_CONTEXT (f_next_stack) = record;
7545 TYPE_STUB_DECL (record) = type_decl;
7546 TYPE_NAME (record) = type_decl;
7547 TYPE_FIELDS (record) = f_next_o;
7548 DECL_CHAIN (f_next_o) = f_next_o_limit;
7549 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7550 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7551 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7553 layout_type (record);
7555 return record;
7558 /* Implement `va_start' for varargs and stdarg. */
7559 static void
7560 sh_va_start (tree valist, rtx nextarg)
7562 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7563 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7564 tree t, u;
7565 int nfp, nint;
7567 if ((! TARGET_SH2E && ! TARGET_SH4)
7568 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7570 std_expand_builtin_va_start (valist, nextarg);
7571 return;
7574 f_next_o = TYPE_FIELDS (va_list_type_node);
7575 f_next_o_limit = DECL_CHAIN (f_next_o);
7576 f_next_fp = DECL_CHAIN (f_next_o_limit);
7577 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7578 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7580 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7581 NULL_TREE);
7582 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7583 valist, f_next_o_limit, NULL_TREE);
7584 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7585 NULL_TREE);
7586 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7587 valist, f_next_fp_limit, NULL_TREE);
7588 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7589 valist, f_next_stack, NULL_TREE);
7591 /* Call __builtin_saveregs. */
7592 u = make_tree (sizetype, expand_builtin_saveregs ());
7593 u = fold_convert (ptr_type_node, u);
7594 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7595 TREE_SIDE_EFFECTS (t) = 1;
7596 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7598 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7599 if (nfp < 8)
7600 nfp = 8 - nfp;
7601 else
7602 nfp = 0;
7603 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
7604 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7605 TREE_SIDE_EFFECTS (t) = 1;
7606 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7608 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7609 TREE_SIDE_EFFECTS (t) = 1;
7610 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7612 nint = crtl->args.info.arg_count[SH_ARG_INT];
7613 if (nint < 4)
7614 nint = 4 - nint;
7615 else
7616 nint = 0;
7617 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
7618 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7619 TREE_SIDE_EFFECTS (t) = 1;
7620 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7622 u = make_tree (ptr_type_node, nextarg);
7623 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7624 TREE_SIDE_EFFECTS (t) = 1;
7625 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7628 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7629 member, return it. */
7630 static tree
7631 find_sole_member (tree type)
7633 tree field, member = NULL_TREE;
7635 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7637 if (TREE_CODE (field) != FIELD_DECL)
7638 continue;
7639 if (!DECL_SIZE (field))
7640 return NULL_TREE;
7641 if (integer_zerop (DECL_SIZE (field)))
7642 continue;
7643 if (member)
7644 return NULL_TREE;
7645 member = field;
7647 return member;
7650 /* Implement `va_arg'. */
7651 static tree
7652 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7653 gimple_seq *post_p ATTRIBUTE_UNUSED)
7655 tree tmp;
7656 tree addr, lab_over = NULL, result = NULL;
7657 tree eff_type;
7659 const bool pass_by_ref =
7660 !VOID_TYPE_P (type)
7661 && targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7663 if (pass_by_ref)
7664 type = build_pointer_type (type);
7666 HOST_WIDE_INT size = int_size_in_bytes (type);
7667 HOST_WIDE_INT rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7668 tree pptr_type_node = build_pointer_type (ptr_type_node);
7670 if ((TARGET_SH2E || TARGET_SH4)
7671 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7673 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7674 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7675 tree lab_false;
7676 tree member;
7678 f_next_o = TYPE_FIELDS (va_list_type_node);
7679 f_next_o_limit = DECL_CHAIN (f_next_o);
7680 f_next_fp = DECL_CHAIN (f_next_o_limit);
7681 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7682 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7684 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7685 NULL_TREE);
7686 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7687 valist, f_next_o_limit, NULL_TREE);
7688 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7689 valist, f_next_fp, NULL_TREE);
7690 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7691 valist, f_next_fp_limit, NULL_TREE);
7692 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7693 valist, f_next_stack, NULL_TREE);
7695 /* Structures with a single member with a distinct mode are passed
7696 like their member. This is relevant if the latter has a REAL_TYPE
7697 or COMPLEX_TYPE type. */
7698 eff_type = type;
7699 while (TREE_CODE (eff_type) == RECORD_TYPE
7700 && (member = find_sole_member (eff_type))
7701 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7702 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7703 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7705 tree field_type = TREE_TYPE (member);
7707 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7708 eff_type = field_type;
7709 else
7711 gcc_assert ((TYPE_ALIGN (eff_type)
7712 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7713 || (TYPE_ALIGN (eff_type)
7714 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7715 break;
7719 bool pass_as_float;
7720 if (TARGET_FPU_DOUBLE)
7722 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7723 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7724 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7725 && size <= 16));
7727 else
7729 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7732 addr = create_tmp_var (pptr_type_node);
7733 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7734 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7736 valist = build_simple_mem_ref (addr);
7738 if (pass_as_float)
7740 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
7741 tree cmp;
7742 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7744 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7745 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7747 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7748 tmp = next_fp_limit;
7749 if (size > 4 && !is_double)
7750 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
7751 tmp = build2 (GE_EXPR, boolean_type_node,
7752 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7753 cmp = build3 (COND_EXPR, void_type_node, tmp,
7754 build1 (GOTO_EXPR, void_type_node,
7755 unshare_expr (lab_false)), NULL_TREE);
7756 if (!is_double)
7757 gimplify_and_add (cmp, pre_p);
7759 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7760 || (is_double || size == 16))
7762 tmp = fold_convert (sizetype, next_fp_tmp);
7763 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7764 size_int (UNITS_PER_WORD));
7765 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
7766 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7768 if (is_double)
7769 gimplify_and_add (cmp, pre_p);
7771 #ifdef FUNCTION_ARG_SCmode_WART
7772 if (TYPE_MODE (eff_type) == SCmode
7773 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7775 tree subtype = TREE_TYPE (eff_type);
7776 tree real, imag;
7778 imag
7779 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7780 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7782 real
7783 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7784 real = get_initialized_tmp_var (real, pre_p, NULL);
7786 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7787 if (type != eff_type)
7788 result = build1 (VIEW_CONVERT_EXPR, type, result);
7789 result = get_initialized_tmp_var (result, pre_p, NULL);
7791 #endif /* FUNCTION_ARG_SCmode_WART */
7793 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7794 gimplify_and_add (tmp, pre_p);
7796 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7797 gimplify_and_add (tmp, pre_p);
7799 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7800 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7801 gimplify_assign (unshare_expr (next_fp_tmp),
7802 unshare_expr (valist), pre_p);
7804 gimplify_assign (unshare_expr (valist),
7805 unshare_expr (next_fp_tmp), post_p);
7806 valist = next_fp_tmp;
7808 else
7810 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
7811 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7812 unshare_expr (next_o_limit));
7813 tmp = build3 (COND_EXPR, void_type_node, tmp,
7814 build1 (GOTO_EXPR, void_type_node,
7815 unshare_expr (lab_false)),
7816 NULL_TREE);
7817 gimplify_and_add (tmp, pre_p);
7819 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7820 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7822 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7823 gimplify_and_add (tmp, pre_p);
7825 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7826 gimplify_and_add (tmp, pre_p);
7828 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7829 gimplify_assign (unshare_expr (next_o),
7830 unshare_expr (next_o_limit), pre_p);
7832 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7833 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7836 if (!result)
7838 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7839 gimplify_and_add (tmp, pre_p);
7843 /* ??? In va-sh.h, there had been code to make values larger than
7844 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7846 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7847 if (result)
7849 gimplify_assign (result, tmp, pre_p);
7850 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7851 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7852 gimplify_and_add (tmp, pre_p);
7854 else
7855 result = tmp;
7857 if (pass_by_ref)
7858 result = build_va_arg_indirect_ref (result);
7860 return result;
7863 /* 64 bit floating points memory transfers are paired single precision loads
7864 or store. So DWARF information needs fixing in little endian (unless
7865 PR=SZ=1 in FPSCR). */
7867 sh_dwarf_register_span (rtx reg)
7869 unsigned regno = REGNO (reg);
7871 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7872 return NULL_RTX;
7874 return
7875 gen_rtx_PARALLEL (VOIDmode,
7876 gen_rtvec (2,
7877 gen_rtx_REG (SFmode, regno + 1),
7878 gen_rtx_REG (SFmode, regno)));
7881 static machine_mode
7882 sh_promote_function_mode (const_tree type, machine_mode mode,
7883 int *punsignedp, const_tree funtype,
7884 int for_return)
7886 if (sh_promote_prototypes (funtype))
7887 return promote_mode (type, mode, punsignedp);
7888 else
7889 return default_promote_function_mode (type, mode, punsignedp, funtype,
7890 for_return);
7893 static bool
7894 sh_promote_prototypes (const_tree type)
7896 if (TARGET_HITACHI)
7897 return false;
7898 if (! type)
7899 return true;
7900 return ! sh_attr_renesas_p (type);
7903 static bool
7904 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7905 const_tree type, bool named ATTRIBUTE_UNUSED)
7907 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7909 if (targetm.calls.must_pass_in_stack (mode, type))
7910 return true;
7912 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7913 wants to know about pass-by-reference semantics for incoming
7914 arguments. */
7915 if (! cum)
7916 return false;
7918 return false;
7921 static bool
7922 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
7923 const_tree type, bool named ATTRIBUTE_UNUSED)
7925 /* ??? How can it possibly be correct to return true only on the
7926 caller side of the equation? Is there someplace else in the
7927 sh backend that's magically producing the copies? */
7928 return (get_cumulative_args (cum)->outgoing
7929 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7930 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7933 static sh_arg_class
7934 get_sh_arg_class (machine_mode mode)
7936 if (TARGET_FPU_ANY && mode == SFmode)
7937 return SH_ARG_FLOAT;
7939 if (TARGET_FPU_DOUBLE
7940 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7941 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT))
7942 return SH_ARG_FLOAT;
7944 return SH_ARG_INT;
7947 /* Round a register number up to a proper boundary for an arg of mode
7948 MODE.
7949 The SH doesn't care about double alignment, so we only
7950 round doubles to even regs when asked to explicitly. */
7951 static int
7952 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
7954 /* FIXME: This used to be a macro and has been copy pasted into this
7955 function as is. Make this more readable. */
7956 return
7957 (((TARGET_ALIGN_DOUBLE
7958 || (TARGET_FPU_DOUBLE
7959 && (mode == DFmode || mode == DCmode)
7960 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
7961 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
7962 ? (cum.arg_count[(int) get_sh_arg_class (mode)]
7963 + (cum.arg_count[(int) get_sh_arg_class (mode)] & 1))
7964 : cum.arg_count[(int) get_sh_arg_class (mode)]);
7967 /* Return true if arg of the specified mode should be passed in a register
7968 or false otherwise. */
7969 static bool
7970 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
7971 const_tree type)
7973 /* FIXME: This used to be a macro and has been copy pasted into this
7974 function as is. Make this more readable. */
7975 return
7976 ((type == 0
7977 || (! TREE_ADDRESSABLE (type)
7978 && (! (TARGET_HITACHI || cum.renesas_abi)
7979 || ! (AGGREGATE_TYPE_P (type)
7980 || (!TARGET_FPU_ANY
7981 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7982 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
7983 && ! cum.force_mem
7984 && (TARGET_SH2E
7985 ? ((mode) == BLKmode
7986 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
7987 + int_size_in_bytes (type))
7988 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
7989 : ((sh_round_reg (cum, mode)
7990 + sh_hard_regno_nregs (BASE_ARG_REG (mode), mode))
7991 <= NPARM_REGS (mode)))
7992 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
7995 static int
7996 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
7997 tree type, bool named ATTRIBUTE_UNUSED)
7999 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
8000 int words = 0;
8002 if (sh_pass_in_reg_p (*cum, mode, type)
8003 && !TARGET_FPU_DOUBLE
8004 && (sh_round_reg (*cum, mode)
8005 + (mode != BLKmode
8006 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
8007 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
8008 > NPARM_REGS (mode)))
8009 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
8011 return words * UNITS_PER_WORD;
8015 /* Define where to put the arguments to a function.
8016 Value is zero to push the argument on the stack,
8017 or a hard register in which to store the argument.
8019 MODE is the argument's machine mode.
8020 TYPE is the data type of the argument (as a tree).
8021 This is null for libcalls where that information may
8022 not be available.
8023 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8024 the preceding args and about the function being called.
8025 NAMED is nonzero if this argument is a named parameter
8026 (otherwise it is an extra parameter matching an ellipsis).
8028 On SH the first args are normally in registers
8029 and the rest are pushed. Any arg that starts within the first
8030 NPARM_REGS words is at least partially passed in a register unless
8031 its data type forbids. */
8032 static rtx
8033 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
8034 const_tree type, bool named)
8036 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8038 if (mode == VOIDmode)
8039 return ca->renesas_abi ? const1_rtx : const0_rtx;
8041 if (sh_pass_in_reg_p (*ca, mode, type)
8042 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8044 int regno;
8046 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8047 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8049 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8050 gen_rtx_REG (SFmode,
8051 BASE_ARG_REG (mode)
8052 + (sh_round_reg (*ca, mode) ^ 1)),
8053 const0_rtx);
8054 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8055 gen_rtx_REG (SFmode,
8056 BASE_ARG_REG (mode)
8057 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8058 GEN_INT (4));
8059 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8062 /* If the alignment of a DF value causes an SF register to be
8063 skipped, we will use that skipped register for the next SF
8064 value. */
8065 if ((TARGET_HITACHI || ca->renesas_abi)
8066 && ca->free_single_fp_reg
8067 && mode == SFmode)
8068 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8070 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8071 ^ (mode == SFmode && TARGET_SH4
8072 && TARGET_LITTLE_ENDIAN
8073 && ! TARGET_HITACHI && ! ca->renesas_abi);
8074 return gen_rtx_REG (mode, regno);
8078 return NULL_RTX;
8081 /* Update the data in CUM to advance over an argument
8082 of mode MODE and data type TYPE.
8083 (TYPE is null for libcalls where that information may not be
8084 available.) */
8085 static void
8086 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
8087 const_tree type, bool named ATTRIBUTE_UNUSED)
8089 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8091 if (ca->force_mem)
8092 ca->force_mem = false;
8094 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8096 /* Note that we've used the skipped register. */
8097 if (mode == SFmode && ca->free_single_fp_reg)
8099 ca->free_single_fp_reg = 0;
8100 return;
8102 /* When we have a DF after an SF, there's an SF register that get
8103 skipped in order to align the DF value. We note this skipped
8104 register, because the next SF value will use it, and not the
8105 SF that follows the DF. */
8106 if (mode == DFmode
8107 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
8109 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
8110 + BASE_ARG_REG (mode));
8114 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8115 || sh_pass_in_reg_p (*ca, mode, type))
8116 (ca->arg_count[(int) get_sh_arg_class (mode)]
8117 = (sh_round_reg (*ca, mode)
8118 + (mode == BLKmode
8119 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8120 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
8123 /* The Renesas calling convention doesn't quite fit into this scheme since
8124 the address is passed like an invisible argument, but one that is always
8125 passed in memory. */
8126 static rtx
8127 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8129 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8130 return NULL_RTX;
8131 return gen_rtx_REG (Pmode, 2);
8134 /* Worker function for TARGET_FUNCTION_VALUE.
8136 For the SH, this is like LIBCALL_VALUE, except that we must change the
8137 mode like PROMOTE_MODE does.
8138 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8139 tested here has to be kept in sync with the one in
8140 explow.c:promote_mode. */
8141 static rtx
8142 sh_function_value (const_tree valtype,
8143 const_tree fn_decl_or_type,
8144 bool outgoing ATTRIBUTE_UNUSED)
8146 if (fn_decl_or_type
8147 && !DECL_P (fn_decl_or_type))
8148 fn_decl_or_type = NULL;
8150 return gen_rtx_REG (
8151 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8152 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8153 && (TREE_CODE (valtype) == INTEGER_TYPE
8154 || TREE_CODE (valtype) == ENUMERAL_TYPE
8155 || TREE_CODE (valtype) == BOOLEAN_TYPE
8156 || TREE_CODE (valtype) == REAL_TYPE
8157 || TREE_CODE (valtype) == OFFSET_TYPE))
8158 && sh_promote_prototypes (fn_decl_or_type)
8159 ? SImode : TYPE_MODE (valtype)),
8160 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8163 /* Worker function for TARGET_LIBCALL_VALUE. */
8164 static rtx
8165 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8167 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8170 /* Return true if N is a possible register number of function value. */
8171 static bool
8172 sh_function_value_regno_p (const unsigned int regno)
8174 return regno == FIRST_RET_REG || (TARGET_SH2E && regno == FIRST_FP_RET_REG);
8177 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8178 static bool
8179 sh_return_in_memory (const_tree type, const_tree fndecl)
8181 return TYPE_MODE (type) == BLKmode
8182 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8183 && TREE_CODE (type) == RECORD_TYPE);
8186 /* We actually emit the code in sh_expand_prologue. We used to use
8187 a static variable to flag that we need to emit this code, but that
8188 doesn't when inlining, when functions are deferred and then emitted
8189 later. Fortunately, we already have two flags that are part of struct
8190 function that tell if a function uses varargs or stdarg. */
8191 static void
8192 sh_setup_incoming_varargs (cumulative_args_t ca,
8193 machine_mode mode,
8194 tree type,
8195 int *pretend_arg_size,
8196 int second_time ATTRIBUTE_UNUSED)
8198 gcc_assert (cfun->stdarg);
8199 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8201 int named_parm_regs, anon_parm_regs;
8203 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
8204 + (mode == BLKmode
8205 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8206 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
8207 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8208 if (anon_parm_regs > 0)
8209 *pretend_arg_size = anon_parm_regs * 4;
8213 static bool
8214 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8216 return false;
8219 static bool
8220 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8222 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8224 return ! (TARGET_HITACHI || ca->renesas_abi);
8228 /* Define the offset between two registers, one to be eliminated, and
8229 the other its replacement, at the start of a routine. */
8231 initial_elimination_offset (int from, int to)
8233 const int regs_saved_rounding = 0;
8234 int save_flags = target_flags;
8235 HARD_REG_SET live_regs_mask;
8237 int regs_saved = calc_live_regs (&live_regs_mask);
8239 int total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8240 target_flags = save_flags;
8242 int total_saved_regs_space = regs_saved + regs_saved_rounding;
8244 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8245 return total_saved_regs_space + total_auto_space;
8247 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8248 return total_saved_regs_space + total_auto_space;
8250 /* Initial gap between fp and sp is 0. */
8251 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8252 return 0;
8254 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8255 return rounded_frame_size (0);
8257 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8258 return rounded_frame_size (0);
8260 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8261 && (to == HARD_FRAME_POINTER_REGNUM
8262 || to == STACK_POINTER_REGNUM));
8263 return total_auto_space;
8266 /* Parse the -mfixed-range= option string. */
8267 void
8268 sh_fix_range (const char *const_str)
8270 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8271 REG2 are either register names or register numbers. The effect
8272 of this option is to mark the registers in the range from REG1 to
8273 REG2 as ``fixed'' so they won't be used by the compiler. */
8275 char* str = strcpy ((char*)alloca (strlen (const_str) + 1), const_str);
8277 while (1)
8279 char* dash = strchr (str, '-');
8280 if (!dash)
8282 warning (0, "value of -mfixed-range must have form REG1-REG2");
8283 return;
8285 *dash = '\0';
8286 char* comma = strchr (dash + 1, ',');
8287 if (comma)
8288 *comma = '\0';
8290 int first = decode_reg_name (str);
8291 if (first < 0)
8293 warning (0, "unknown register name: %s", str);
8294 return;
8297 int last = decode_reg_name (dash + 1);
8298 if (last < 0)
8300 warning (0, "unknown register name: %s", dash + 1);
8301 return;
8304 *dash = '-';
8306 if (first > last)
8308 warning (0, "%s-%s is an empty range", str, dash + 1);
8309 return;
8312 for (int i = first; i <= last; ++i)
8313 fixed_regs[i] = call_used_regs[i] = 1;
8315 if (!comma)
8316 break;
8318 *comma = ',';
8319 str = comma + 1;
8323 /* Insert any deferred function attributes from earlier pragmas. */
8324 static void
8325 sh_insert_attributes (tree node, tree *attributes)
8327 if (TREE_CODE (node) != FUNCTION_DECL)
8328 return;
8330 /* We are only interested in fields. */
8331 if (!DECL_P (node))
8332 return;
8334 /* Append the attributes to the deferred attributes. */
8335 *sh_deferred_function_attributes_tail = *attributes;
8336 tree attrs = sh_deferred_function_attributes;
8337 if (!attrs)
8338 return;
8340 /* Some attributes imply or require the interrupt attribute. */
8341 if (!lookup_attribute ("interrupt_handler", attrs)
8342 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8344 /* If we have a trapa_handler, but no interrupt_handler attribute,
8345 insert an interrupt_handler attribute. */
8346 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8347 /* We can't use sh_pr_interrupt here because that's not in the
8348 java frontend. */
8349 attrs
8350 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8351 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8352 if the interrupt attribute is missing, we ignore the attribute
8353 and warn. */
8354 else if (lookup_attribute ("sp_switch", attrs)
8355 || lookup_attribute ("trap_exit", attrs)
8356 || lookup_attribute ("nosave_low_regs", attrs)
8357 || lookup_attribute ("resbank", attrs))
8359 tree *tail;
8361 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8363 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8364 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8365 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8366 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8367 warning (OPT_Wattributes,
8368 "%qE attribute only applies to interrupt functions",
8369 TREE_PURPOSE (attrs));
8370 else
8372 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8373 NULL_TREE);
8374 tail = &TREE_CHAIN (*tail);
8377 attrs = *attributes;
8381 /* Install the processed list. */
8382 *attributes = attrs;
8384 /* Clear deferred attributes. */
8385 sh_deferred_function_attributes = NULL_TREE;
8386 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8388 return;
8391 /*------------------------------------------------------------------------------
8392 Target specific attributes
8393 Supported attributes are:
8395 * interrupt_handler
8396 Specifies this function is an interrupt handler.
8398 * trapa_handler
8399 Like interrupt_handler, but don't save all registers.
8401 * sp_switch
8402 Specifies an alternate stack for an interrupt handler to run on.
8404 * trap_exit
8405 Use a trapa to exit an interrupt function instead of rte.
8407 * nosave_low_regs
8408 Don't save r0..r7 in an interrupt handler function.
8409 This is useful on SH3* and SH4*, which have a separate set of low
8410 regs for user and privileged modes.
8411 This is mainly to be used for non-reentrant interrupt handlers (i.e.
8412 those that run with interrupts disabled and thus can't be
8413 interrupted thenselves).
8415 * renesas
8416 Use Renesas calling/layout conventions (functions and structures).
8418 * resbank
8419 In case of an interrupt handler function, use a register bank to
8420 save registers R0-R14, MACH, MACL, GBR and PR.
8421 This is available only on SH2A targets.
8423 * function_vector
8424 Declares a function to be called using the TBR relative addressing
8425 mode. Takes an argument that specifies the slot number in the table
8426 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
8429 /* Handle a 'resbank' attribute. */
8430 static tree
8431 sh_handle_resbank_handler_attribute (tree * node, tree name,
8432 tree args ATTRIBUTE_UNUSED,
8433 int flags ATTRIBUTE_UNUSED,
8434 bool * no_add_attrs)
8436 if (!TARGET_SH2A)
8438 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8439 name);
8440 *no_add_attrs = true;
8442 if (TREE_CODE (*node) != FUNCTION_DECL)
8444 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8445 name);
8446 *no_add_attrs = true;
8449 return NULL_TREE;
8452 /* Handle an "interrupt_handler" attribute; arguments as in
8453 struct attribute_spec.handler. */
8454 static tree
8455 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8456 tree args ATTRIBUTE_UNUSED,
8457 int flags ATTRIBUTE_UNUSED,
8458 bool *no_add_attrs)
8460 if (TREE_CODE (*node) != FUNCTION_DECL)
8462 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8463 name);
8464 *no_add_attrs = true;
8467 return NULL_TREE;
8470 /* Handle an 'function_vector' attribute; arguments as in
8471 struct attribute_spec.handler. */
8472 static tree
8473 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8474 tree args ATTRIBUTE_UNUSED,
8475 int flags ATTRIBUTE_UNUSED,
8476 bool * no_add_attrs)
8478 if (!TARGET_SH2A)
8480 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8481 name);
8482 *no_add_attrs = true;
8484 else if (TREE_CODE (*node) != FUNCTION_DECL)
8486 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8487 name);
8488 *no_add_attrs = true;
8490 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8492 /* The argument must be a constant integer. */
8493 warning (OPT_Wattributes,
8494 "%qE attribute argument not an integer constant",
8495 name);
8496 *no_add_attrs = true;
8498 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8500 /* The argument value must be between 0 to 255. */
8501 warning (OPT_Wattributes,
8502 "%qE attribute argument should be between 0 to 255",
8503 name);
8504 *no_add_attrs = true;
8506 return NULL_TREE;
8509 /* Returns true if current function has been assigned the attribute
8510 'function_vector'. */
8511 bool
8512 sh2a_is_function_vector_call (rtx x)
8514 if (GET_CODE (x) == SYMBOL_REF
8515 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8517 tree tr = SYMBOL_REF_DECL (x);
8519 if (sh2a_function_vector_p (tr))
8520 return true;
8523 return false;
8526 /* Returns the function vector number, if the attribute
8527 'function_vector' is assigned, otherwise returns zero. */
8529 sh2a_get_function_vector_number (rtx x)
8531 if ((GET_CODE (x) == SYMBOL_REF)
8532 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8534 tree t = SYMBOL_REF_DECL (x);
8536 if (TREE_CODE (t) != FUNCTION_DECL)
8537 return 0;
8539 for (tree list = SH_ATTRIBUTES (t); list; list = TREE_CHAIN (list))
8540 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8541 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8543 return 0;
8545 else
8546 return 0;
8549 /* Handle an "sp_switch" attribute; arguments as in
8550 struct attribute_spec.handler. */
8551 static tree
8552 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8553 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8555 if (TREE_CODE (*node) != FUNCTION_DECL)
8557 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8558 name);
8559 *no_add_attrs = true;
8561 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8563 /* The argument must be a constant string. */
8564 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8565 name);
8566 *no_add_attrs = true;
8569 return NULL_TREE;
8572 /* Handle an "trap_exit" attribute; arguments as in
8573 struct attribute_spec.handler. */
8574 static tree
8575 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8576 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8578 if (TREE_CODE (*node) != FUNCTION_DECL)
8580 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8581 name);
8582 *no_add_attrs = true;
8584 /* The argument specifies a trap number to be used in a trapa instruction
8585 at function exit (instead of an rte instruction). */
8586 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8588 /* The argument must be a constant integer. */
8589 warning (OPT_Wattributes, "%qE attribute argument not an "
8590 "integer constant", name);
8591 *no_add_attrs = true;
8594 return NULL_TREE;
8597 static tree
8598 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8599 tree name ATTRIBUTE_UNUSED,
8600 tree args ATTRIBUTE_UNUSED,
8601 int flags ATTRIBUTE_UNUSED,
8602 bool *no_add_attrs ATTRIBUTE_UNUSED)
8604 return NULL_TREE;
8607 /* True if __attribute__((renesas)) or -mrenesas. */
8608 bool
8609 sh_attr_renesas_p (const_tree td)
8611 if (TARGET_HITACHI)
8612 return true;
8613 if (td == NULL_TREE)
8614 return false;
8615 if (DECL_P (td))
8616 td = TREE_TYPE (td);
8617 if (td == error_mark_node)
8618 return false;
8619 return lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) != NULL_TREE;
8622 /* True if __attribute__((renesas)) or -mrenesas, for the current
8623 function. */
8624 bool
8625 sh_cfun_attr_renesas_p (void)
8627 return sh_attr_renesas_p (current_function_decl);
8630 /* Returns true if the current function has the "interrupt_handler"
8631 attribute set. */
8632 bool
8633 sh_cfun_interrupt_handler_p (void)
8635 return (lookup_attribute ("interrupt_handler",
8636 DECL_ATTRIBUTES (current_function_decl))
8637 != NULL_TREE);
8640 /* Returns true if FUNC has been assigned the attribute
8641 "function_vector". */
8642 bool
8643 sh2a_function_vector_p (tree func)
8645 if (TREE_CODE (func) != FUNCTION_DECL)
8646 return false;
8648 for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list))
8649 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8650 return true;
8652 return false;
8655 /* Returns true if given tree has the "resbank" attribute set. */
8656 bool
8657 sh_cfun_resbank_handler_p (void)
8659 return ((lookup_attribute ("resbank",
8660 DECL_ATTRIBUTES (current_function_decl))
8661 != NULL_TREE)
8662 && (lookup_attribute ("interrupt_handler",
8663 DECL_ATTRIBUTES (current_function_decl))
8664 != NULL_TREE) && TARGET_SH2A);
8667 /* Returns true if the current function has a "trap_exit" attribute set. */
8668 bool
8669 sh_cfun_trap_exit_p (void)
8671 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
8672 != NULL_TREE;
8675 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8676 static const char *
8677 sh_check_pch_target_flags (int old_flags)
8679 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8680 | MASK_SH_E | MASK_HARD_SH4
8681 | MASK_FPU_SINGLE | MASK_SH4))
8682 return _("created and used with different architectures / ABIs");
8683 if ((old_flags ^ target_flags) & MASK_HITACHI)
8684 return _("created and used with different ABIs");
8685 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8686 return _("created and used with different endianness");
8687 return NULL;
8690 /* Predicates used by the templates. */
8692 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
8693 Used only in general_movsrc_operand. */
8694 bool
8695 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8697 switch (REGNO (op))
8699 case PR_REG:
8700 case MACL_REG:
8701 case MACH_REG:
8702 return true;
8704 return false;
8707 /* Returns true if OP is a floating point value with value 0.0. */
8708 bool
8709 fp_zero_operand (rtx op)
8711 if (GET_MODE (op) != SFmode)
8712 return false;
8714 const REAL_VALUE_TYPE* r = CONST_DOUBLE_REAL_VALUE (op);
8715 return real_equal (r, &dconst0) && ! REAL_VALUE_MINUS_ZERO (*r);
8718 /* Returns true if OP is a floating point value with value 1.0. */
8719 bool
8720 fp_one_operand (rtx op)
8722 if (GET_MODE (op) != SFmode)
8723 return false;
8725 return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1);
8728 /* Return the TLS type for TLS symbols. */
8729 enum tls_model
8730 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8732 if (GET_CODE (op) != SYMBOL_REF)
8733 return TLS_MODEL_NONE;
8734 return SYMBOL_REF_TLS_MODEL (op);
8737 /* Return the destination address of a branch. */
8738 static int
8739 branch_dest (rtx branch)
8741 rtx dest = SET_SRC (PATTERN (branch));
8743 if (GET_CODE (dest) == IF_THEN_ELSE)
8744 dest = XEXP (dest, 1);
8746 return INSN_ADDRESSES (INSN_UID (XEXP (dest, 0)));
8749 /* Return nonzero if REG is not used after INSN.
8750 We assume REG is a reload reg, and therefore does
8751 not live past labels. It may live past calls or jumps though. */
8752 bool
8753 reg_unused_after (rtx reg, rtx_insn *insn)
8755 /* If the reg is set by this instruction, then it is safe for our
8756 case. Disregard the case where this is a store to memory, since
8757 we are checking a register used in the store address. */
8758 rtx set = single_set (insn);
8759 if (set && !MEM_P (SET_DEST (set))
8760 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8761 return true;
8763 while ((insn = NEXT_INSN (insn)))
8765 if (!INSN_P (insn))
8766 continue;
8768 rtx_code code = GET_CODE (insn);
8770 #if 0
8771 /* If this is a label that existed before reload, then the register
8772 is dead here. However, if this is a label added by reorg, then
8773 the register may still be live here. We can't tell the difference,
8774 so we just ignore labels completely. */
8775 if (code == CODE_LABEL)
8776 return 1;
8777 /* else */
8778 #endif
8780 if (code == JUMP_INSN)
8781 return false;
8783 /* If this is a sequence, we must handle them all at once.
8784 We could have for instance a call that sets the target register,
8785 and an insn in a delay slot that uses the register. In this case,
8786 we must return 0. */
8787 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8789 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
8790 bool retval = false;
8792 for (int i = 0; i < seq->len (); i++)
8794 rtx_insn *this_insn = seq->insn (i);
8795 rtx set = single_set (this_insn);
8797 if (CALL_P (this_insn))
8798 code = CALL_INSN;
8799 else if (JUMP_P (this_insn))
8801 if (INSN_ANNULLED_BRANCH_P (this_insn))
8802 return false;
8803 code = JUMP_INSN;
8806 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8807 return false;
8808 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8810 if (!MEM_P (SET_DEST (set)))
8811 retval = true;
8812 else
8813 return false;
8815 if (set == NULL_RTX
8816 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8817 return false;
8819 if (retval)
8820 return true;
8821 else if (code == JUMP_INSN)
8822 return false;
8825 rtx set = single_set (insn);
8826 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8827 return false;
8828 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8829 return !MEM_P (SET_DEST (set));
8830 if (set == NULL && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8831 return false;
8833 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8834 return true;
8836 return true;
8840 static GTY(()) rtx t_reg_rtx;
8842 get_t_reg_rtx (void)
8844 if (! t_reg_rtx)
8845 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
8846 return t_reg_rtx;
8849 static GTY(()) tree fpscr_values;
8851 static void
8852 emit_fpu_switch (rtx scratch, int index)
8854 if (fpscr_values == NULL)
8856 tree t = build_index_type (integer_one_node);
8857 t = build_array_type (integer_type_node, t);
8858 t = build_decl (BUILTINS_LOCATION,
8859 VAR_DECL, get_identifier ("__fpscr_values"), t);
8860 DECL_ARTIFICIAL (t) = 1;
8861 DECL_IGNORED_P (t) = 1;
8862 DECL_EXTERNAL (t) = 1;
8863 TREE_STATIC (t) = 1;
8864 TREE_PUBLIC (t) = 1;
8865 TREE_USED (t) = 1;
8867 fpscr_values = t;
8870 rtx src = DECL_RTL (fpscr_values);
8871 if (!can_create_pseudo_p ())
8873 emit_move_insn (scratch, XEXP (src, 0));
8874 if (index != 0)
8875 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8876 src = adjust_automodify_address (src, SImode, scratch, index * 4);
8878 else
8879 src = adjust_address (src, SImode, index * 4);
8881 emit_insn (gen_lds_fpscr (src));
8884 static rtx get_free_reg (HARD_REG_SET);
8886 /* This function returns a register to use to load the address to load
8887 the fpscr from. Currently it always returns r1 or r7, but when we are
8888 able to use pseudo registers after combine, or have a better mechanism
8889 for choosing a register, it should be done here. */
8890 /* REGS_LIVE is the liveness information for the point for which we
8891 need this allocation. In some bare-bones exit blocks, r1 is live at the
8892 start. We can even have all of r0..r3 being live:
8893 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8894 INSN before which new insns are placed with will clobber the register
8895 we return. If a basic block consists only of setting the return value
8896 register to a pseudo and using that register, the return value is not
8897 live before or after this block, yet we we'll insert our insns right in
8898 the middle. */
8899 static rtx
8900 get_free_reg (HARD_REG_SET regs_live)
8902 if (! TEST_HARD_REG_BIT (regs_live, 1))
8903 return gen_rtx_REG (Pmode, 1);
8905 /* Hard reg 1 is live; since this is a small register classes target,
8906 there shouldn't be anything but a jump before the function end. */
8907 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8908 return gen_rtx_REG (Pmode, 7);
8911 /* This function will set the fpscr from memory.
8912 MODE is the mode we are setting it to. */
8913 void
8914 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8916 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
8917 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8919 rtx addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8920 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8923 /* Is the given character a logical line separator for the assembler? */
8924 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8925 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8926 #endif
8928 static bool
8929 sequence_insn_p (rtx_insn *insn)
8931 rtx_insn* prev = PREV_INSN (insn);
8932 if (prev == NULL)
8933 return false;
8935 rtx_insn* next = NEXT_INSN (prev);
8936 if (next == NULL)
8937 return false;
8939 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
8943 sh_insn_length_adjustment (rtx_insn *insn)
8945 /* Instructions with unfilled delay slots take up an extra two bytes for
8946 the nop in the delay slot. */
8947 if (((NONJUMP_INSN_P (insn)
8948 && GET_CODE (PATTERN (insn)) != USE
8949 && GET_CODE (PATTERN (insn)) != CLOBBER)
8950 || CALL_P (insn) || JUMP_P (insn))
8951 && ! sequence_insn_p (insn)
8952 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8953 return 2;
8955 /* Increase the insn length of a cbranch without a delay slot insn to
8956 force a delay slot which will be stuffed with a nop. */
8957 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
8958 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
8959 && ! sequence_insn_p (insn))
8960 return 2;
8962 /* sh-dsp parallel processing insn take four bytes instead of two. */
8964 if (NONJUMP_INSN_P (insn))
8966 int sum = 0;
8967 rtx body = PATTERN (insn);
8968 const char *templ;
8969 char c;
8970 bool maybe_label = true;
8972 if (GET_CODE (body) == ASM_INPUT)
8973 templ = XSTR (body, 0);
8974 else if (asm_noperands (body) >= 0)
8975 templ
8976 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8977 else
8978 return 0;
8981 int ppi_adjust = 0;
8984 c = *templ++;
8985 while (c == ' ' || c == '\t');
8986 /* all sh-dsp parallel-processing insns start with p.
8987 The only non-ppi sh insn starting with p is pref.
8988 The only ppi starting with pr is prnd. */
8989 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8990 ppi_adjust = 2;
8991 /* The repeat pseudo-insn expands two three insns, a total of
8992 six bytes in size. */
8993 else if ((c == 'r' || c == 'R')
8994 && ! strncasecmp ("epeat", templ, 5))
8995 ppi_adjust = 4;
8996 while (c && c != '\n'
8997 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
8999 /* If this is a label, it is obviously not a ppi insn. */
9000 if (c == ':' && maybe_label)
9002 ppi_adjust = 0;
9003 break;
9005 else if (c == '\'' || c == '"')
9006 maybe_label = false;
9007 c = *templ++;
9009 sum += ppi_adjust;
9010 maybe_label = c != ':';
9012 while (c);
9013 return sum;
9015 return 0;
9018 /* Return TRUE for a valid displacement for the REG+disp addressing
9019 with MODE. */
9020 bool
9021 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
9022 bool allow_zero)
9024 if (! CONST_INT_P (op))
9025 return false;
9028 const HOST_WIDE_INT offset = INTVAL (op);
9029 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
9030 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
9032 /* If the mode does not support any displacement always return false.
9033 Even though an index of '0' is actually always valid, it will cause
9034 troubles when e.g. a DFmode move is split into two SFmode moves,
9035 where one SFmode move will have index '0' and the other move will
9036 have index '4'. */
9037 if (!allow_zero && max_disp < 1)
9038 return false;
9040 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
9044 /* Recognize an RTL expression that is a valid memory address for
9045 an instruction.
9046 The MODE argument is the machine mode for the MEM expression
9047 that wants to use this address.
9048 Allow REG
9049 REG+disp
9050 REG+r0
9051 REG++
9052 --REG
9054 GBR+disp */
9055 static bool
9056 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
9058 if (REG_P (x) && REGNO (x) == GBR_REG)
9059 return true;
9061 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9062 return true;
9063 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9064 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9065 return true;
9066 else if (GET_CODE (x) == PLUS)
9068 rtx xop0 = XEXP (x, 0);
9069 rtx xop1 = XEXP (x, 1);
9071 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
9072 return gbr_displacement (xop1, mode);
9074 if (GET_MODE_SIZE (mode) <= 8
9075 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9076 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
9077 return true;
9079 if (GET_MODE_SIZE (mode) <= 4
9080 || (TARGET_FPU_DOUBLE && TARGET_FMOVD && mode == DFmode))
9082 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9083 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9084 return true;
9085 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9086 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9087 return true;
9091 return false;
9094 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9095 isn't protected by a PIC unspec. */
9096 bool
9097 nonpic_symbol_mentioned_p (rtx x)
9099 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9100 || GET_CODE (x) == PC)
9101 return true;
9103 /* We don't want to look into the possible MEM location of a
9104 CONST_DOUBLE, since we're not going to use it, in general. */
9105 if (GET_CODE (x) == CONST_DOUBLE)
9106 return false;
9108 if (GET_CODE (x) == UNSPEC
9109 && (XINT (x, 1) == UNSPEC_PIC
9110 || XINT (x, 1) == UNSPEC_GOT
9111 || XINT (x, 1) == UNSPEC_GOTOFF
9112 || XINT (x, 1) == UNSPEC_GOTPLT
9113 || XINT (x, 1) == UNSPEC_GOTTPOFF
9114 || XINT (x, 1) == UNSPEC_DTPOFF
9115 || XINT (x, 1) == UNSPEC_TPOFF
9116 || XINT (x, 1) == UNSPEC_PLT
9117 || XINT (x, 1) == UNSPEC_PCREL
9118 || XINT (x, 1) == UNSPEC_SYMOFF
9119 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
9120 || XINT (x, 1) == UNSPEC_GOTFUNCDESC
9121 || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
9122 return false;
9124 const char* fmt = GET_RTX_FORMAT (GET_CODE (x));
9125 for (int i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9127 if (fmt[i] == 'E')
9129 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
9130 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9131 return true;
9133 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9134 return true;
9137 return false;
9140 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9141 @GOTOFF in `reg'. */
9143 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg)
9145 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9146 return orig;
9148 if (GET_CODE (orig) == LABEL_REF
9149 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9151 if (reg == NULL_RTX)
9152 reg = gen_reg_rtx (Pmode);
9154 if (TARGET_FDPIC
9155 && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig))
9157 /* Weak functions may be NULL which doesn't work with
9158 GOTOFFFUNCDESC because the runtime offset is not known. */
9159 if (SYMBOL_REF_WEAK (orig))
9160 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9161 else
9162 emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
9164 else if (TARGET_FDPIC
9165 && (GET_CODE (orig) == LABEL_REF
9166 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig)
9167 && (TREE_READONLY (SYMBOL_REF_DECL (orig))
9168 || SYMBOL_REF_EXTERNAL_P (orig)
9169 || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig))))))
9170 /* In FDPIC, GOTOFF can only be used for writable data. */
9171 emit_insn (gen_symGOT2reg (reg, orig));
9172 else
9173 emit_insn (gen_symGOTOFF2reg (reg, orig));
9174 return reg;
9176 else if (GET_CODE (orig) == SYMBOL_REF)
9178 if (reg == NULL_RTX)
9179 reg = gen_reg_rtx (Pmode);
9181 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
9182 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9183 else
9184 emit_insn (gen_symGOT2reg (reg, orig));
9185 return reg;
9187 return orig;
9190 /* Given a (logical) mode size and an offset in bytes, try to find a the
9191 appropriate displacement value for a mov insn. On SH the displacements
9192 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
9193 15 bytes in QImode. To compensate this we create a new base address by
9194 adding an adjustment value to it.
9196 If the originally requested offset is greater than 127 we prefer using
9197 values 124..127 over 128..131 to increase opportunities to use the
9198 add #imm, Rn insn.
9200 In some cases it is possible that a requested offset might seem unaligned
9201 or inappropriate for the mode size, like offset = 2 and mode size = 4.
9202 This is compensated by adjusting the base address so that the effective
9203 address of the displacement move insn will be aligned.
9205 This is not the best possible way of rebasing the base address, as it
9206 does not look at other present displacement addressings around it.
9207 In some cases this can create more base address adjustments than would
9208 actually be necessary. */
9209 struct disp_adjust
9211 rtx offset_adjust;
9212 rtx mov_disp;
9215 static struct disp_adjust
9216 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
9218 struct disp_adjust res = { NULL_RTX, NULL_RTX };
9220 /* Do not try to use SH2A's large displacements here, because this would
9221 effectively disable the small displacement insns. */
9222 const int mode_sz = GET_MODE_SIZE (mode);
9223 const int mov_insn_sz = mov_insn_size (mode, false);
9224 const int max_disp = sh_max_mov_insn_displacement (mode, false);
9225 const int max_disp_next = max_disp + mov_insn_sz;
9226 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
9227 HOST_WIDE_INT offset_adjust;
9229 /* In some cases this actually does happen and we must check for it. */
9230 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
9231 return res;
9233 /* Keeps the previous behavior for QImode displacement addressing.
9234 This just decides how the offset is re-based. Removing this special
9235 case will result in slightly bigger code on average, but it's not that
9236 bad actually. */
9237 if (mov_insn_sz == 1)
9238 align_modifier = 0;
9240 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
9242 if (mode_sz + offset - offset_adjust <= max_disp_next)
9244 res.offset_adjust = GEN_INT (offset_adjust);
9245 res.mov_disp = GEN_INT (offset - offset_adjust);
9248 return res;
9251 /* Try to modify an illegitimate address and make it legitimate.
9252 If we find one, return the new, valid address.
9253 Otherwise, return the original address. */
9254 static rtx
9255 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9257 if (flag_pic)
9258 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9260 if ((TARGET_FPU_DOUBLE && mode == DFmode)
9261 || (TARGET_SH2E && mode == SFmode))
9262 return x;
9264 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
9265 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
9267 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
9268 INTVAL (XEXP (x, 1)));
9270 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9272 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9273 adj.offset_adjust, NULL_RTX, 0,
9274 OPTAB_LIB_WIDEN);
9275 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9278 return x;
9281 /* Attempt to replace *p, which is an address that needs reloading, with
9282 a valid memory address for an operand of mode MODE.
9283 Like for sh_legitimize_address, for the SH we try to get a normal form
9284 of the address. That will allow inheritance of the address reloads. */
9285 bool
9286 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9287 int itype)
9289 enum reload_type type = (enum reload_type) itype;
9290 const int mode_sz = GET_MODE_SIZE (mode);
9292 if (sh_lra_p ())
9293 return false;
9295 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
9296 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true))
9298 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
9299 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
9301 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9303 push_reload (*p, NULL_RTX, p, NULL,
9304 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9305 return true;
9308 if (TARGET_SH2E && mode == SFmode)
9310 *p = copy_rtx (*p);
9311 push_reload (*p, NULL_RTX, p, NULL,
9312 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9313 return true;
9316 /* FIXME: Do not allow to legitimize QImode and HImode displacement
9317 moves because then reload has a problem figuring the constraint
9318 that the move insn target/source reg must be R0.
9319 Or maybe some handling is wrong in sh_secondary_reload for this
9320 to work properly? */
9321 if ((mode_sz == 4 || mode_sz == 8)
9322 && ! (TARGET_SH4 && mode == DFmode)
9323 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9325 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
9326 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9327 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9328 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9329 return true;
9333 /* We must re-recognize what we created before. */
9334 if (GET_CODE (*p) == PLUS
9335 && (mode_sz == 4 || mode_sz == 8)
9336 && GET_CODE (XEXP (*p, 0)) == PLUS
9337 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9338 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9339 && CONST_INT_P (XEXP (*p, 1))
9340 && ! (TARGET_SH2E && mode == SFmode))
9342 /* Because this address is so complex, we know it must have
9343 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9344 it is already unshared, and needs no further unsharing. */
9345 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9346 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9347 return true;
9350 return false;
9353 /* In the name of slightly smaller debug output, and to cater to
9354 general assembler lossage, recognize various UNSPEC sequences
9355 and turn them back into a direct symbol reference. */
9356 static rtx
9357 sh_delegitimize_address (rtx orig_x)
9359 orig_x = delegitimize_mem_from_attrs (orig_x);
9361 rtx x = orig_x;
9362 if (MEM_P (x))
9363 x = XEXP (x, 0);
9364 if (GET_CODE (x) == CONST)
9366 rtx y = XEXP (x, 0);
9367 if (GET_CODE (y) == UNSPEC)
9369 if (XINT (y, 1) == UNSPEC_GOT
9370 || XINT (y, 1) == UNSPEC_GOTOFF
9371 || XINT (y, 1) == UNSPEC_SYMOFF)
9372 return XVECEXP (y, 0, 0);
9373 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
9375 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
9377 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
9379 if (GET_CODE (symplt) == UNSPEC
9380 && (XINT (symplt, 1) == UNSPEC_PLT
9381 || XINT (symplt, 1) == UNSPEC_PCREL))
9382 return XVECEXP (symplt, 0, 0);
9388 return orig_x;
9391 /* Mark the use of a constant in the literal table. If the constant
9392 has multiple labels, make it unique. */
9393 static rtx
9394 mark_constant_pool_use (rtx x)
9396 if (x == NULL_RTX)
9397 return x;
9399 switch (GET_CODE (x))
9401 case LABEL_REF:
9402 x = XEXP (x, 0);
9403 case CODE_LABEL:
9404 break;
9405 default:
9406 return x;
9409 /* Get the first label in the list of labels for the same constant
9410 and delete another labels in the list. */
9411 rtx_insn* lab = as_a <rtx_insn*> (x);
9412 for (rtx_insn* insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
9414 if (!LABEL_P (insn)
9415 || LABEL_REFS (insn) != NEXT_INSN (insn))
9416 break;
9417 lab = insn;
9420 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9421 as_a<rtx_insn *> (insn)->set_deleted ();
9423 /* Mark constants in a window. */
9424 for (rtx_insn* insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn;
9425 insn = NEXT_INSN (insn))
9427 if (!NONJUMP_INSN_P (insn))
9428 continue;
9430 rtx pattern = PATTERN (insn);
9431 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9432 continue;
9434 switch (XINT (pattern, 1))
9436 case UNSPECV_CONST2:
9437 case UNSPECV_CONST4:
9438 case UNSPECV_CONST8:
9439 XVECEXP (pattern, 0, 1) = const1_rtx;
9440 break;
9441 case UNSPECV_WINDOW_END:
9442 if (XVECEXP (pattern, 0, 0) == x)
9443 return lab;
9444 break;
9445 case UNSPECV_CONST_END:
9446 return lab;
9447 default:
9448 break;
9452 return lab;
9455 /* Return true if it's possible to redirect BRANCH1 to the destination
9456 of an unconditional jump BRANCH2. We only want to do this if the
9457 resulting branch will have a short displacement. */
9458 static bool
9459 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
9461 /* Don't follow if BRANCH2 is possible to be a jump crossing between
9462 hot and cold partitions. */
9463 if (flag_reorder_blocks_and_partition
9464 && simplejump_p (branch2)
9465 && CROSSING_JUMP_P (branch2))
9466 return false;
9468 if (flag_expensive_optimizations && simplejump_p (branch2))
9470 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9471 rtx_insn *insn;
9472 int distance;
9474 for (distance = 0, insn = NEXT_INSN (branch1);
9475 insn && distance < 256;
9476 insn = PREV_INSN (insn))
9478 if (insn == dest)
9479 return true;
9480 else
9481 distance += get_attr_length (insn);
9483 for (distance = 0, insn = NEXT_INSN (branch1);
9484 insn && distance < 256;
9485 insn = NEXT_INSN (insn))
9487 if (insn == dest)
9488 return true;
9489 else
9490 distance += get_attr_length (insn);
9493 return false;
9496 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9497 bool
9498 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9499 unsigned int new_reg)
9501 /* Interrupt functions can only use registers that have already been
9502 saved by the prologue, even if they would normally be
9503 call-clobbered. */
9504 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9505 return false;
9507 return true;
9510 /* Function to update the integer COST
9511 based on the relationship between INSN that is dependent on
9512 DEP_INSN through the dependence LINK. The default is to make no
9513 adjustment to COST. This can be used for example to specify to
9514 the scheduler that an output- or anti-dependence does not incur
9515 the same cost as a data-dependence. The return value should be
9516 the new value for COST. */
9517 static int
9518 sh_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
9519 unsigned int)
9521 rtx reg, use_pat;
9523 if (dep_type == 0)
9525 if (recog_memoized (insn) < 0
9526 || recog_memoized (dep_insn) < 0)
9527 return cost;
9529 rtx dep_set = single_set (dep_insn);
9531 /* The latency that we specify in the scheduling description refers
9532 to the actual output, not to an auto-increment register; for that,
9533 the latency is one. */
9534 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9536 rtx set = single_set (insn);
9538 if (set
9539 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9540 && (!MEM_P (SET_DEST (set))
9541 || !reg_mentioned_p (SET_DEST (dep_set),
9542 XEXP (SET_DEST (set), 0))))
9543 cost = 1;
9545 /* The only input for a call that is timing-critical is the
9546 function's address. */
9547 if (CALL_P (insn))
9549 rtx call = get_call_rtx_from (insn);
9550 if (call
9551 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9552 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9553 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9554 cost -= TARGET_SH4_300 ? 3 : 6;
9556 /* Likewise, the most timing critical input for an sfuncs call
9557 is the function address. However, sfuncs typically start
9558 using their arguments pretty quickly.
9559 Assume a four cycle delay for SH4 before they are needed.
9560 Cached ST40-300 calls are quicker, so assume only a one
9561 cycle delay there.
9562 ??? Maybe we should encode the delays till input registers
9563 are needed by sfuncs into the sfunc call insn. */
9564 /* All sfunc calls are parallels with at least four components.
9565 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9566 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9567 && XVECLEN (PATTERN (insn), 0) >= 4
9568 && (reg = sfunc_uses_reg (insn)))
9570 if (! reg_set_p (reg, dep_insn))
9571 cost -= TARGET_SH4_300 ? 1 : 4;
9573 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9575 attr_type dep_type = get_attr_type (dep_insn);
9576 attr_type type;
9577 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9578 cost--;
9579 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9580 && (type = get_attr_type (insn)) != TYPE_CALL
9581 && type != TYPE_SFUNC)
9582 cost--;
9583 /* When the preceding instruction loads the shift amount of
9584 the following SHAD/SHLD, the latency of the load is increased
9585 by 1 cycle. */
9586 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9587 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9588 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9589 XEXP (SET_SRC (single_set (insn)),
9590 1)))
9591 cost++;
9592 /* When an LS group instruction with a latency of less than
9593 3 cycles is followed by a double-precision floating-point
9594 instruction, FIPR, or FTRV, the latency of the first
9595 instruction is increased to 3 cycles. */
9596 else if (cost < 3
9597 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9598 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9599 cost = 3;
9600 /* The lsw register of a double-precision computation is ready one
9601 cycle earlier. */
9602 else if (reload_completed
9603 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9604 && (use_pat = single_set (insn))
9605 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9606 SET_SRC (use_pat)))
9607 cost -= 1;
9609 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9610 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9611 cost -= 1;
9613 else if (TARGET_SH4_300)
9615 /* Stores need their input register two cycles later. */
9616 attr_type type;
9617 if (dep_set && cost >= 1
9618 && ((type = get_attr_type (insn)) == TYPE_STORE
9619 || type == TYPE_PSTORE
9620 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9622 rtx set = single_set (insn);
9624 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9625 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9627 cost -= 2;
9628 /* But don't reduce the cost below 1 if the address depends
9629 on a side effect of dep_insn. */
9630 if (cost < 1
9631 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9632 cost = 1;
9637 /* An anti-dependence penalty of two applies if the first insn is a double
9638 precision fadd / fsub / fmul. */
9639 else if (!TARGET_SH4_300
9640 && dep_type == REG_DEP_ANTI
9641 && recog_memoized (dep_insn) >= 0
9642 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9643 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9644 /* A lot of alleged anti-flow dependences are fake,
9645 so check this one is real. */
9646 && flow_dependent_p (dep_insn, insn))
9647 cost = 2;
9649 return cost;
9652 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9653 if DEP_INSN is anti-flow dependent on INSN. */
9654 static bool
9655 flow_dependent_p (rtx insn, rtx dep_insn)
9657 rtx tmp = PATTERN (insn);
9659 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9660 return tmp == NULL_RTX;
9663 /* A helper function for flow_dependent_p called through note_stores. */
9664 static void
9665 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9667 rtx * pinsn = (rtx *) data;
9669 if (*pinsn && reg_referenced_p (x, *pinsn))
9670 *pinsn = NULL_RTX;
9673 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9674 'special function' patterns (type sfunc) that clobber pr, but that
9675 do not look like function calls to leaf_function_p. Hence we must
9676 do this extra check. */
9677 static int
9678 sh_pr_n_sets (void)
9680 return DF_REG_DEF_COUNT (PR_REG);
9683 /* Return where to allocate pseudo for a given hard register initial
9684 value. */
9685 static rtx
9686 sh_allocate_initial_value (rtx hard_reg)
9688 if (REGNO (hard_reg) == PR_REG)
9690 if (crtl->is_leaf && ! sh_pr_n_sets ())
9691 return hard_reg;
9692 else
9693 return gen_frame_mem (Pmode, return_address_pointer_rtx);
9696 return NULL_RTX;
9699 /* This function returns "2" to indicate dual issue for the SH4
9700 processor. To be used by the DFA pipeline description. */
9701 static int
9702 sh_issue_rate (void)
9704 if (TARGET_SUPERSCALAR)
9705 return 2;
9706 else
9707 return 1;
9710 /* Functions for ready queue reordering for sched1. */
9712 /* Get weight for mode for a set x. */
9713 static short
9714 find_set_regmode_weight (rtx x, machine_mode mode)
9716 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9717 return 1;
9718 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9720 if (REG_P (SET_DEST (x)))
9722 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9723 return 1;
9724 else
9725 return 0;
9727 return 1;
9729 return 0;
9732 /* Get regmode weight for insn. */
9733 static short
9734 find_insn_regmode_weight (rtx insn, machine_mode mode)
9736 /* Increment weight for each register born here. */
9737 rtx x = PATTERN (insn);
9738 short reg_weight = find_set_regmode_weight (x, mode);
9739 if (GET_CODE (x) == PARALLEL)
9741 int j;
9742 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9744 x = XVECEXP (PATTERN (insn), 0, j);
9745 reg_weight += find_set_regmode_weight (x, mode);
9748 /* Decrement weight for each register that dies here. */
9749 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9751 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9753 rtx note = XEXP (x, 0);
9754 if (REG_P (note) && GET_MODE (note) == mode)
9755 reg_weight--;
9758 return reg_weight;
9761 /* Calculate regmode weights for all insns of a basic block. */
9762 static void
9763 find_regmode_weight (basic_block b, machine_mode mode)
9765 rtx_insn *insn, *next_tail, *head, *tail;
9767 get_ebb_head_tail (b, b, &head, &tail);
9768 next_tail = NEXT_INSN (tail);
9770 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9772 /* Handle register life information. */
9773 if (!INSN_P (insn))
9774 continue;
9776 if (mode == SFmode)
9777 INSN_REGMODE_WEIGHT (insn, mode) =
9778 find_insn_regmode_weight (insn, mode)
9779 + 2 * find_insn_regmode_weight (insn, DFmode);
9780 else if (mode == SImode)
9781 INSN_REGMODE_WEIGHT (insn, mode) =
9782 find_insn_regmode_weight (insn, mode)
9783 + 2 * find_insn_regmode_weight (insn, DImode);
9787 /* Comparison function for ready queue sorting. */
9788 static int
9789 rank_for_reorder (const void *x, const void *y)
9791 rtx_insn *tmp = *(rtx_insn * const *) y;
9792 rtx_insn *tmp2 = *(rtx_insn * const *) x;
9794 /* The insn in a schedule group should be issued the first. */
9795 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9796 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9798 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9799 minimizes instruction movement, thus minimizing sched's effect on
9800 register pressure. */
9801 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9804 /* Resort the array A in which only element at index N may be out of order. */
9805 static void
9806 swap_reorder (rtx_insn **a, int n)
9808 rtx_insn *insn = a[n - 1];
9809 int i = n - 2;
9811 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9813 a[i + 1] = a[i];
9814 i -= 1;
9816 a[i + 1] = insn;
9819 /* Sort the ready list by ascending priority. */
9820 static void
9821 ready_reorder (rtx_insn **ready, int nready)
9823 if (nready == 2)
9824 swap_reorder (ready, nready);
9825 else if (nready > 2)
9826 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
9829 /* Count life regions of r0 for a block. */
9830 static int
9831 find_r0_life_regions (basic_block b)
9833 bool live;
9834 int set;
9835 int death = 0;
9837 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9839 set = 1;
9840 live = true;
9842 else
9844 set = 0;
9845 live = false;
9848 rtx_insn* insn = BB_HEAD (b);
9849 rtx_insn* end = BB_END (b);
9850 rtx r0_reg = gen_rtx_REG (SImode, R0_REG);
9851 while (1)
9853 if (INSN_P (insn))
9855 if (find_regno_note (insn, REG_DEAD, R0_REG))
9857 death++;
9858 live = false;
9861 rtx pset;
9862 if (!live
9863 && (pset = single_set (insn))
9864 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9865 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9867 set++;
9868 live = true;
9871 if (insn == end)
9872 break;
9873 insn = NEXT_INSN (insn);
9875 return set - death;
9878 /* Calculate regmode weights for all insns of all basic block. */
9879 static void
9880 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9881 int verbose ATTRIBUTE_UNUSED,
9882 int old_max_uid)
9884 basic_block b;
9886 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9887 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9888 r0_life_regions = 0;
9890 FOR_EACH_BB_REVERSE_FN (b, cfun)
9892 find_regmode_weight (b, SImode);
9893 find_regmode_weight (b, SFmode);
9894 if (!reload_completed)
9895 r0_life_regions += find_r0_life_regions (b);
9898 CURR_REGMODE_PRESSURE (SImode) = 0;
9899 CURR_REGMODE_PRESSURE (SFmode) = 0;
9902 /* Cleanup. */
9903 static void
9904 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9905 int verbose ATTRIBUTE_UNUSED)
9907 if (regmode_weight[0])
9909 free (regmode_weight[0]);
9910 regmode_weight[0] = NULL;
9912 if (regmode_weight[1])
9914 free (regmode_weight[1]);
9915 regmode_weight[1] = NULL;
9919 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9920 keep count of register pressures on SImode and SFmode. */
9921 static int
9922 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9923 int sched_verbose ATTRIBUTE_UNUSED,
9924 rtx_insn *insn,
9925 int can_issue_more)
9927 if (GET_CODE (PATTERN (insn)) != USE
9928 && GET_CODE (PATTERN (insn)) != CLOBBER)
9929 cached_can_issue_more = can_issue_more - 1;
9930 else
9931 cached_can_issue_more = can_issue_more;
9933 if (reload_completed)
9934 return cached_can_issue_more;
9936 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9937 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9939 return cached_can_issue_more;
9942 static void
9943 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9944 int verbose ATTRIBUTE_UNUSED,
9945 int veclen ATTRIBUTE_UNUSED)
9947 CURR_REGMODE_PRESSURE (SImode) = 0;
9948 CURR_REGMODE_PRESSURE (SFmode) = 0;
9951 /* Some magic numbers. */
9952 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9953 functions that already have high pressure on r0. */
9954 #define R0_MAX_LIFE_REGIONS 2
9955 /* Register Pressure thresholds for SImode and SFmode registers. */
9956 #define SIMODE_MAX_WEIGHT 5
9957 #define SFMODE_MAX_WEIGHT 10
9959 /* Return true if the pressure is high for MODE. */
9960 static bool
9961 high_pressure (machine_mode mode)
9963 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9964 functions that already have high pressure on r0. */
9965 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9966 return true;
9968 if (mode == SFmode)
9969 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9970 else
9971 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9974 /* Reorder ready queue if register pressure is high. */
9975 static int
9976 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9977 int sched_verbose ATTRIBUTE_UNUSED,
9978 rtx_insn **ready,
9979 int *n_readyp,
9980 int clock_var ATTRIBUTE_UNUSED)
9982 if (reload_completed)
9983 return sh_issue_rate ();
9985 if (high_pressure (SFmode) || high_pressure (SImode))
9987 ready_reorder (ready, *n_readyp);
9990 return sh_issue_rate ();
9993 /* Skip cycles if the current register pressure is high. */
9994 static int
9995 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9996 int sched_verbose ATTRIBUTE_UNUSED,
9997 rtx_insn **ready ATTRIBUTE_UNUSED,
9998 int *n_readyp ATTRIBUTE_UNUSED,
9999 int clock_var ATTRIBUTE_UNUSED)
10001 if (reload_completed)
10002 return cached_can_issue_more;
10004 if (high_pressure(SFmode) || high_pressure (SImode))
10005 skip_cycles = 1;
10007 return cached_can_issue_more;
10010 /* Skip cycles without sorting the ready queue. This will move insn from
10011 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10012 queue by sh_reorder. */
10014 /* Generally, skipping these many cycles are sufficient for all insns to move
10015 from Q -> R. */
10016 #define MAX_SKIPS 8
10018 static int
10019 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10020 int sched_verbose ATTRIBUTE_UNUSED,
10021 rtx_insn *insn ATTRIBUTE_UNUSED,
10022 int last_clock_var,
10023 int clock_var,
10024 int *sort_p)
10026 if (reload_completed)
10027 return 0;
10029 if (skip_cycles)
10031 if ((clock_var - last_clock_var) < MAX_SKIPS)
10033 *sort_p = 0;
10034 return 1;
10036 /* If this is the last cycle we are skipping, allow reordering of R. */
10037 if ((clock_var - last_clock_var) == MAX_SKIPS)
10039 *sort_p = 1;
10040 return 1;
10044 skip_cycles = 0;
10046 return 0;
10049 static bool
10050 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10052 return TARGET_HITACHI || sh_attr_renesas_p (record_type);
10056 On the SH1..SH4, the trampoline looks like
10057 2 0002 D202 mov.l l2,r2
10058 1 0000 D301 mov.l l1,r3
10059 3 0004 422B jmp @r2
10060 4 0006 0009 nop
10061 5 0008 00000000 l1: .long area
10062 6 000c 00000000 l2: .long function
10064 FDPIC needs a form that includes a function descriptor and
10065 code to load the GOT register:
10066 0 0000 00000000 .long l0
10067 1 0004 00000000 .long gotval
10068 2 0008 D302 l0: mov.l l1,r3
10069 3 000a D203 mov.l l2,r2
10070 4 000c 6122 mov.l @r2,r1
10071 5 000e 5C21 mov.l @(4,r2),r12
10072 6 0010 412B jmp @r1
10073 7 0012 0009 nop
10074 8 0014 00000000 l1: .long area
10075 9 0018 00000000 l2: .long function
10077 SH5 (compact) uses r1 instead of r3 for the static chain. */
10079 /* Emit insns to store a value at memory address + offset. */
10080 static void
10081 sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value)
10083 gcc_assert ((offset & 3) == 0);
10084 emit_move_insn (offset == 0
10085 ? change_address (addr, SImode, NULL_RTX)
10086 : adjust_address (addr, SImode, offset), value);
10089 /* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2. */
10090 static void
10091 sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1)
10093 sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN
10094 ? (w0 | (w1 << 16))
10095 : (w1 | (w0 << 16)), SImode));
10098 /* Emit RTL insns to initialize the variable parts of a trampoline.
10099 FNADDR is an RTX for the address of the function's pure code.
10100 CXT is an RTX for the static chain value for the function. */
10101 static void
10102 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10104 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10105 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10107 if (TARGET_FDPIC)
10109 rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
10111 sh_emit_storesi (tramp_mem, 0, a);
10112 sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ());
10114 sh_emit_storehi (tramp_mem, 8, 0xd302, 0xd203);
10115 sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21);
10116 sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009);
10118 sh_emit_storesi (tramp_mem, 20, cxt);
10119 sh_emit_storesi (tramp_mem, 24, fnaddr);
10121 else
10123 sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301);
10124 sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009);
10126 sh_emit_storesi (tramp_mem, 8, cxt);
10127 sh_emit_storesi (tramp_mem, 12, fnaddr);
10129 if (TARGET_HARD_SH4)
10131 if (!TARGET_INLINE_IC_INVALIDATE
10132 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
10133 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10134 FUNCTION_ORDINARY).sym,
10135 LCT_NORMAL, VOIDmode, tramp, SImode);
10136 else
10137 emit_insn (gen_ic_invalidate_line (tramp));
10141 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10142 static rtx
10143 sh_trampoline_adjust_address (rtx tramp)
10145 return tramp;
10148 /* If PIC, we cannot make sibling calls to global functions
10149 because the PLT requires r12 to be live. */
10150 static bool
10151 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10153 return (1
10154 && ! sh_cfun_interrupt_handler_p ()
10155 && (! flag_pic || TARGET_FDPIC
10156 || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
10157 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10160 /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */
10161 void
10162 sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
10164 const_tree decl = SYMBOL_REF_DECL (sym);
10165 bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
10167 if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
10168 emit_insn (gen_sym_label2reg (reg, sym, lab));
10169 else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
10170 emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
10171 else
10172 emit_insn (gen_symPLT_label2reg (reg, sym, lab));
10175 /* Machine specific built-in functions. */
10177 struct builtin_description
10179 bool (* const is_enabled) (void);
10180 const enum insn_code icode;
10181 const char *const name;
10182 int signature;
10183 tree fndecl;
10186 /* This function can be used if there are any built-ins that are not for
10187 SHmedia. It's commented out to avoid the defined-but-unused warning. */
10188 static bool
10189 sh1_builtin_p (void)
10191 return TARGET_SH1;
10194 /* describe number and signedness of arguments; arg[0] == result
10195 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10196 /* 9: 64-bit pointer, 10: 32-bit pointer */
10197 static const char signature_args[][4] =
10199 #define SH_BLTIN_V2SI2 0
10200 { 4, 4 },
10201 #define SH_BLTIN_V4HI2 1
10202 { 4, 4 },
10203 #define SH_BLTIN_V2SI3 2
10204 { 4, 4, 4 },
10205 #define SH_BLTIN_V4HI3 3
10206 { 4, 4, 4 },
10207 #define SH_BLTIN_V8QI3 4
10208 { 4, 4, 4 },
10209 #define SH_BLTIN_MAC_HISI 5
10210 { 1, 4, 4, 1 },
10211 #define SH_BLTIN_SH_HI 6
10212 { 4, 4, 1 },
10213 #define SH_BLTIN_SH_SI 7
10214 { 4, 4, 1 },
10215 #define SH_BLTIN_V4HI2V2SI 8
10216 { 4, 4, 4 },
10217 #define SH_BLTIN_V4HI2V8QI 9
10218 { 4, 4, 4 },
10219 #define SH_BLTIN_SISF 10
10220 { 4, 2 },
10221 #define SH_BLTIN_LDUA_L 11
10222 { 2, 10 },
10223 #define SH_BLTIN_LDUA_Q 12
10224 { 1, 10 },
10225 #define SH_BLTIN_STUA_L 13
10226 { 0, 10, 2 },
10227 #define SH_BLTIN_STUA_Q 14
10228 { 0, 10, 1 },
10229 #define SH_BLTIN_LDUA_L64 15
10230 { 2, 9 },
10231 #define SH_BLTIN_LDUA_Q64 16
10232 { 1, 9 },
10233 #define SH_BLTIN_STUA_L64 17
10234 { 0, 9, 2 },
10235 #define SH_BLTIN_STUA_Q64 18
10236 { 0, 9, 1 },
10237 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10238 #define SH_BLTIN_2 19
10239 #define SH_BLTIN_SU 19
10240 { 1, 2 },
10241 #define SH_BLTIN_3 20
10242 #define SH_BLTIN_SUS 20
10243 { 2, 2, 1 },
10244 #define SH_BLTIN_PSSV 21
10245 { 0, 8, 2, 2 },
10246 #define SH_BLTIN_XXUU 22
10247 #define SH_BLTIN_UUUU 22
10248 { 1, 1, 1, 1 },
10249 #define SH_BLTIN_PV 23
10250 { 0, 8 },
10251 #define SH_BLTIN_VP 24
10252 { 8, 0 },
10253 #define SH_BLTIN_UV 25
10254 { 1, 0 },
10255 #define SH_BLTIN_VU 26
10256 { 0, 1 },
10258 /* mcmv: operands considered unsigned. */
10259 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10260 /* mperm: control value considered unsigned int. */
10261 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10262 /* mshards_q: returns signed short. */
10263 /* nsb: takes long long arg, returns unsigned char. */
10264 static struct builtin_description bdesc[] =
10266 { sh1_builtin_p,
10267 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
10268 { sh1_builtin_p,
10269 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
10272 static tree sh_builtin_get_fpscr;
10273 static tree sh_builtin_set_fpscr;
10275 static void
10276 sh_init_builtins (void)
10278 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10279 memset (shared, 0, sizeof shared);
10281 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
10283 builtin_description* d = &bdesc[di];
10285 if (!d->is_enabled ())
10286 continue;
10288 tree type, arg_type = NULL_TREE;
10289 int signature = d->signature;
10291 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10292 type = shared[signature];
10293 else
10295 int has_result = signature_args[signature][0] != 0;
10296 tree args[3];
10298 if (! TARGET_FPU_ANY
10299 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10300 continue;
10301 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
10302 args[i] = NULL_TREE;
10303 for (int i = 3; ; i--)
10305 int arg = signature_args[signature][i];
10306 int opno = i - 1 + has_result;
10308 if (arg & 8)
10309 arg_type = ptr_type_node;
10310 else if (arg)
10311 arg_type = (*lang_hooks.types.type_for_mode)
10312 (insn_data[d->icode].operand[opno].mode, (arg & 1));
10313 else if (i)
10314 continue;
10315 else
10316 arg_type = void_type_node;
10317 if (i == 0)
10318 break;
10319 args[i-1] = arg_type;
10321 type = build_function_type_list (arg_type, args[0], args[1],
10322 args[2], NULL_TREE);
10323 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10324 shared[signature] = type;
10326 d->fndecl =
10327 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10328 NULL, NULL_TREE);
10329 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
10330 if (d->icode == CODE_FOR_sts_fpscr)
10331 sh_builtin_get_fpscr = d->fndecl;
10332 else if (d->icode == CODE_FOR_set_fpscr)
10333 sh_builtin_set_fpscr = d->fndecl;
10337 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
10339 static void
10340 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
10342 const unsigned SH_FE_INVALID = 64;
10343 const unsigned SH_FE_DIVBYZERO = 32;
10344 const unsigned SH_FE_OVERFLOW = 16;
10345 const unsigned SH_FE_UNDERFLOW = 8;
10346 const unsigned SH_FE_INEXACT = 4;
10347 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
10348 | SH_FE_DIVBYZERO
10349 | SH_FE_OVERFLOW
10350 | SH_FE_UNDERFLOW
10351 | SH_FE_INEXACT);
10352 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
10353 tree fenv_var, mask, ld_fenv, masked_fenv;
10354 tree new_fenv_var, reload_fenv, restore_fnenv;
10355 tree update_call, atomic_feraiseexcept, hold_fnclex;
10357 if (! TARGET_FPU_ANY)
10358 return;
10360 /* Generate the equivalent of :
10361 unsigned int fenv_var;
10362 fenv_var = __builtin_sh_get_fpscr ();
10364 unsigned int masked_fenv;
10365 masked_fenv = fenv_var & mask;
10367 __builtin_sh_set_fpscr (masked_fenv); */
10369 fenv_var = create_tmp_var_raw (unsigned_type_node);
10370 mask = build_int_cst (unsigned_type_node,
10371 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
10372 | SH_FE_ALL_EXCEPT));
10373 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
10374 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
10375 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
10376 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10377 fenv_var = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
10378 build2 (COMPOUND_EXPR, void_type_node, masked_fenv,
10379 ld_fenv),
10380 NULL_TREE, NULL_TREE);
10381 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, hold_fnclex);
10383 /* Store the value of masked_fenv to clear the exceptions:
10384 __builtin_sh_set_fpscr (masked_fenv); */
10386 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10388 /* Generate the equivalent of :
10389 unsigned int new_fenv_var;
10390 new_fenv_var = __builtin_sh_get_fpscr ();
10392 __builtin_sh_set_fpscr (fenv_var);
10394 __atomic_feraiseexcept (new_fenv_var); */
10396 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
10397 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
10398 build_call_expr (sh_builtin_get_fpscr, 0));
10399 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
10400 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
10401 update_call = build_call_expr (atomic_feraiseexcept, 1,
10402 fold_convert (integer_type_node,
10403 new_fenv_var));
10404 *update = build2 (COMPOUND_EXPR, void_type_node,
10405 build2 (COMPOUND_EXPR, void_type_node,
10406 reload_fenv, restore_fnenv), update_call);
10409 /* Implements target hook vector_mode_supported_p. */
10410 bool
10411 sh_vector_mode_supported_p (machine_mode mode ATTRIBUTE_UNUSED)
10413 return false;
10416 bool
10417 sh_frame_pointer_required (void)
10419 /* If needed override this in other tm.h files to cope with various OS
10420 lossage requiring a frame pointer. */
10421 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10422 return true;
10424 if (crtl->profile)
10425 return true;
10427 return false;
10430 /* Implements target hook dwarf_calling_convention. Return an enum
10431 of dwarf_calling_convention. */
10433 sh_dwarf_calling_convention (const_tree func)
10435 if (sh_attr_renesas_p (func))
10436 return DW_CC_GNU_renesas_sh;
10438 return DW_CC_normal;
10441 /* Returns the sh builtin decl for CODE. */
10442 static tree
10443 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10445 if (code >= ARRAY_SIZE (bdesc))
10446 return error_mark_node;
10448 if (!bdesc[code].is_enabled ())
10449 return error_mark_node;
10451 return bdesc[code].fndecl;
10454 /* Expand an expression EXP that calls a built-in function,
10455 with result going to TARGET if that's convenient
10456 (and in mode MODE if that's convenient).
10457 SUBTARGET may be used as the target for computing one of EXP's operands.
10458 IGNORE is nonzero if the value is to be ignored. */
10459 static rtx
10460 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10461 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10463 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10464 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10465 const struct builtin_description *d = &bdesc[fcode];
10466 enum insn_code icode = d->icode;
10467 int signature = d->signature;
10468 int nop = 0;
10469 rtx op[4];
10471 if (signature_args[signature][0])
10473 if (ignore)
10474 return NULL_RTX;
10476 machine_mode tmode = insn_data[icode].operand[0].mode;
10477 if (! target || GET_MODE (target) != tmode
10478 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10479 target = gen_reg_rtx (tmode);
10480 op[nop++] = target;
10482 else
10483 target = NULL_RTX;
10485 for (int i = 1; i <= 3; i++, nop++)
10487 if (! signature_args[signature][i])
10488 break;
10489 tree arg = CALL_EXPR_ARG (exp, i - 1);
10490 if (arg == error_mark_node)
10491 return const0_rtx;
10493 machine_mode opmode;
10494 tree optype;
10495 if (signature_args[signature][i] & 8)
10497 opmode = ptr_mode;
10498 optype = ptr_type_node;
10500 else
10502 opmode = insn_data[icode].operand[nop].mode;
10503 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10506 machine_mode argmode = TYPE_MODE (TREE_TYPE (arg));
10507 if (argmode != opmode)
10508 arg = build1 (NOP_EXPR, optype, arg);
10509 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10510 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10511 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10514 rtx pat = NULL_RTX;
10516 switch (nop)
10518 case 1:
10519 pat = (*insn_data[d->icode].genfun) (op[0]);
10520 break;
10521 case 2:
10522 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10523 break;
10524 case 3:
10525 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10526 break;
10527 case 4:
10528 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10529 break;
10530 default:
10531 gcc_unreachable ();
10533 if (! pat)
10534 return NULL_RTX;
10535 emit_insn (pat);
10536 return target;
10539 /* Implement TARGET_HARD_REGNO_NREGS. On the SH all but the XD regs are
10540 UNITS_PER_WORD bits wide. */
10542 static unsigned int
10543 sh_hard_regno_nregs (unsigned int regno, machine_mode mode)
10545 if (XD_REGISTER_P (regno))
10546 return CEIL (GET_MODE_SIZE (mode), 2 * UNITS_PER_WORD);
10547 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
10550 /* Implement TARGET_HARD_REGNO_MODE_OK.
10552 We can allow any mode in any general register. The special registers
10553 only allow SImode. Don't allow any mode in the PR.
10555 We cannot hold DCmode values in the XD registers because alter_reg
10556 handles subregs of them incorrectly. We could work around this by
10557 spacing the XD registers like the DR registers, but this would require
10558 additional memory in every compilation to hold larger register vectors.
10559 We could hold SFmode / SCmode values in XD registers, but that
10560 would require a tertiary reload when reloading from / to memory,
10561 and a secondary reload to reload from / to general regs; that
10562 seems to be a losing proposition.
10564 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10565 it won't be ferried through GP registers first. */
10566 static bool
10567 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10569 if (SPECIAL_REGISTER_P (regno))
10570 return mode == SImode;
10572 if (regno == FPUL_REG)
10573 return (mode == SImode || mode == SFmode);
10575 if (FP_REGISTER_P (regno) && mode == SFmode)
10576 return true;
10578 if (mode == V2SFmode)
10580 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10581 || GENERAL_REGISTER_P (regno)))
10582 return true;
10583 else
10584 return false;
10587 if (mode == V4SFmode)
10589 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10590 || GENERAL_REGISTER_P (regno))
10591 return true;
10592 else
10593 return false;
10596 if (mode == V16SFmode)
10597 return regno == FIRST_XD_REG;
10599 if (FP_REGISTER_P (regno))
10601 if (mode == SFmode
10602 || mode == SImode
10603 || ((TARGET_SH2E) && mode == SCmode)
10604 || (((TARGET_FPU_DOUBLE && mode == DFmode) || mode == DCmode)
10605 && ((regno - FIRST_FP_REG) & 1) == 0)
10606 || (TARGET_SH4 && mode == TImode
10607 && ((regno - FIRST_FP_REG) & 3) == 0))
10608 return true;
10609 else
10610 return false;
10613 if (XD_REGISTER_P (regno))
10614 return mode == DFmode;
10616 if (regno == PR_REG)
10617 return mode == SImode;
10619 if (regno == FPSCR_REG)
10620 return mode == SImode;
10622 return true;
10625 /* Implement TARGET_MODES_TIEABLE_P.
10627 If TARGET_HARD_REGNO_MODE_OK could produce different values for MODE1
10628 and MODE2, for any hard reg, then this must be false for correct output.
10629 That's the case for xd registers: we don't hold SFmode values in
10630 them, so we can't tie an SFmode pseudos with one in another
10631 floating-point mode. */
10633 static bool
10634 sh_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10636 return (mode1 == mode2
10637 || (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
10638 && (mode1 != SFmode && mode2 != SFmode)));
10641 /* Specify the modes required to caller save a given hard regno.
10642 choose_hard_reg_mode chooses mode based on TARGET_HARD_REGNO_MODE_OK
10643 and returns ?Imode for float regs when sh_hard_regno_mode_ok
10644 permits integer modes on them. That makes LRA's split process
10645 unhappy. See PR55212.
10647 machine_mode
10648 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
10649 machine_mode mode)
10651 if (FP_REGISTER_P (regno)
10652 && (mode == SFmode
10653 || mode == SCmode
10654 || ((mode == DFmode || mode == DCmode)
10655 && ((regno - FIRST_FP_REG) & 1) == 0)))
10656 return mode;
10658 return choose_hard_reg_mode (regno, nregs, false);
10661 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10662 static bool
10663 sh_can_change_mode_class (machine_mode from, machine_mode to,
10664 reg_class_t rclass)
10666 /* We want to enable the use of SUBREGs as a means to
10667 VEC_SELECT a single element of a vector. */
10669 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
10670 This can be problematic when SFmode vector subregs need to be accessed
10671 on the stack with displacement addressing, as it happens with -O0.
10672 Thus we disallow the mode change for -O0. */
10673 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10674 return optimize ? !reg_classes_intersect_p (GENERAL_REGS, rclass) : true;
10676 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10678 if (TARGET_LITTLE_ENDIAN)
10680 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10681 return !reg_classes_intersect_p (DF_REGS, rclass);
10683 else
10685 if (GET_MODE_SIZE (from) < 8)
10686 return !reg_classes_intersect_p (DF_REGS, rclass);
10689 return true;
10692 /* Return true if registers in machine mode MODE will likely be
10693 allocated to registers in small register classes. */
10694 bool
10695 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
10697 return true;
10700 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10701 that label is used. */
10702 void
10703 sh_mark_label (rtx address, int nuses)
10705 if (GOTOFF_P (address))
10707 /* Extract the label or symbol. */
10708 address = XEXP (address, 0);
10709 if (GET_CODE (address) == PLUS)
10710 address = XEXP (address, 0);
10711 address = XVECEXP (address, 0, 0);
10713 if (GET_CODE (address) == LABEL_REF
10714 && LABEL_P (XEXP (address, 0)))
10715 LABEL_NUSES (XEXP (address, 0)) += nuses;
10718 /* Compute extra cost of moving data between one register class
10719 and another.
10721 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10722 uses this information. Hence, the general register <-> floating point
10723 register information here is not used for SFmode. */
10724 static int
10725 sh_register_move_cost (machine_mode mode,
10726 reg_class_t srcclass, reg_class_t dstclass)
10728 if (dstclass == T_REGS || dstclass == PR_REGS)
10729 return 10;
10731 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10732 return 4;
10734 if (mode == SImode && TARGET_FMOVD
10735 && REGCLASS_HAS_FP_REG (srcclass)
10736 && REGCLASS_HAS_FP_REG (dstclass))
10737 return 4;
10739 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10740 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10742 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10743 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10744 return 9;
10746 if ((REGCLASS_HAS_FP_REG (dstclass)
10747 && REGCLASS_HAS_GENERAL_REG (srcclass))
10748 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10749 && REGCLASS_HAS_FP_REG (srcclass)))
10751 /* Discourage trying to use fp regs for a pointer. This also
10752 discourages fp regs with SImode because Pmode is an alias
10753 of SImode on this target. See PR target/48596. */
10754 int addend = (mode == Pmode) ? 40 : 0;
10756 return ((TARGET_FMOVD ? 8 : 12) + addend)
10757 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10760 if ((dstclass == FPUL_REGS
10761 && REGCLASS_HAS_GENERAL_REG (srcclass))
10762 || (srcclass == FPUL_REGS
10763 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10764 return 5;
10766 if ((dstclass == FPUL_REGS
10767 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10768 || (srcclass == FPUL_REGS
10769 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10770 return 7;
10772 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10773 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10774 return 4;
10776 if (TARGET_FMOVD
10777 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10778 && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10779 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10781 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10784 static rtx
10785 emit_load_ptr (rtx reg, rtx addr)
10787 rtx mem = gen_const_mem (ptr_mode, addr);
10789 if (Pmode != ptr_mode)
10790 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10791 return emit_move_insn (reg, mem);
10794 static void
10795 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10796 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10797 tree function)
10799 CUMULATIVE_ARGS cum;
10800 int structure_value_byref = 0;
10801 rtx this_rtx, this_value, sibcall, funexp;
10802 rtx_insn *insns;
10803 tree funtype = TREE_TYPE (function);
10804 int simple_add = CONST_OK_FOR_ADD (delta);
10805 int did_load = 0;
10806 rtx scratch0, scratch1, scratch2;
10808 reload_completed = 1;
10809 epilogue_completed = 1;
10810 crtl->uses_only_leaf_regs = 1;
10812 emit_note (NOTE_INSN_PROLOGUE_END);
10814 /* Find the "this" pointer. We have such a wide range of ABIs for the
10815 SH that it's best to do this completely machine independently.
10816 "this" is passed as first argument, unless a structure return pointer
10817 comes first, in which case "this" comes second. */
10818 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10819 #ifndef PCC_STATIC_STRUCT_RETURN
10820 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10821 structure_value_byref = 1;
10822 #endif /* not PCC_STATIC_STRUCT_RETURN */
10823 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10825 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10827 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
10829 this_rtx
10830 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
10832 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10833 static chain pointer (even if you can't have nested virtual functions
10834 right now, someone might implement them sometime), and the rest of the
10835 registers are used for argument passing, are callee-saved, or reserved. */
10836 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10837 -ffixed-reg has been used. */
10838 if (! call_used_regs[0] || fixed_regs[0])
10839 error ("r0 needs to be available as a call-clobbered register");
10840 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10843 if (call_used_regs[1] && ! fixed_regs[1])
10844 scratch1 = gen_rtx_REG (ptr_mode, 1);
10845 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10846 pointing where to return struct values. */
10847 if (call_used_regs[3] && ! fixed_regs[3])
10848 scratch2 = gen_rtx_REG (Pmode, 3);
10851 this_value = plus_constant (Pmode, this_rtx, delta);
10852 if (vcall_offset
10853 && (simple_add || scratch0 != scratch1)
10854 && strict_memory_address_p (ptr_mode, this_value))
10856 emit_load_ptr (scratch0, this_value);
10857 did_load = 1;
10860 if (!delta)
10861 ; /* Do nothing. */
10862 else if (simple_add)
10863 emit_move_insn (this_rtx, this_value);
10864 else
10866 emit_move_insn (scratch1, GEN_INT (delta));
10867 emit_insn (gen_add2_insn (this_rtx, scratch1));
10870 if (vcall_offset)
10872 rtx offset_addr;
10874 if (!did_load)
10875 emit_load_ptr (scratch0, this_rtx);
10877 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
10878 if (strict_memory_address_p (ptr_mode, offset_addr))
10879 ; /* Do nothing. */
10880 else if (scratch0 != scratch1)
10882 /* scratch0 != scratch1, and we have indexed loads. Get better
10883 schedule by loading the offset into r1 and using an indexed
10884 load - then the load of r1 can issue before the load from
10885 (this_rtx + delta) finishes. */
10886 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10887 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10889 else if (CONST_OK_FOR_ADD (vcall_offset))
10891 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10892 offset_addr = scratch0;
10894 else
10895 gcc_unreachable (); /* FIXME */
10896 emit_load_ptr (scratch0, offset_addr);
10898 if (Pmode != ptr_mode)
10899 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10900 emit_insn (gen_add2_insn (this_rtx, scratch0));
10903 /* Generate a tail call to the target function. */
10904 if (! TREE_USED (function))
10906 assemble_external (function);
10907 TREE_USED (function) = 1;
10909 funexp = XEXP (DECL_RTL (function), 0);
10910 /* If the function is overridden, so is the thunk, hence we don't
10911 need GOT addressing even if this is a public symbol. */
10912 #if 0
10913 if (TARGET_SH1 && ! flag_weak)
10914 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10915 else
10916 #endif
10917 if (TARGET_SH2 && flag_pic)
10919 if (TARGET_FDPIC)
10921 sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
10922 XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
10924 else
10926 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10927 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10930 else
10932 emit_move_insn (scratch2, funexp);
10933 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10934 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10936 sibcall = emit_call_insn (sibcall);
10937 SIBLING_CALL_P (sibcall) = 1;
10938 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10939 emit_barrier ();
10941 /* Run just enough of rest_of_compilation to do scheduling and get
10942 the insns emitted. Note that use_thunk calls
10943 assemble_start_function and assemble_end_function. */
10945 insns = get_insns ();
10947 if (optimize > 0)
10949 if (! cfun->cfg)
10950 init_flow (cfun);
10951 split_all_insns_noflow ();
10954 sh_reorg ();
10955 shorten_branches (insns);
10956 final_start_function (insns, file, 1);
10957 final (insns, file, 1);
10958 final_end_function ();
10960 reload_completed = 0;
10961 epilogue_completed = 0;
10964 /* Return an RTX pair for the address and call site label of a function
10965 NAME of kind KIND, placing the result in TARGET if not NULL. For
10966 SFUNC_STATIC, if FDPIC, the LAB member of result will be set to
10967 (const_int 0) if jsr should be used, or a label_ref if bsrf should
10968 be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the
10969 address of the function itself, not a function descriptor, so they
10970 can only be used with functions not using the FDPIC register that
10971 are known to be called directory without a PLT entry. */
10973 function_symbol_result
10974 function_symbol (rtx target, const char *name, sh_function_kind kind)
10976 /* If this is not an ordinary function, the name usually comes from a
10977 string literal or an sprintf buffer. Make sure we use the same
10978 string consistently, so that cse will be able to unify address loads. */
10979 if (kind != FUNCTION_ORDINARY)
10980 name = IDENTIFIER_POINTER (get_identifier (name));
10981 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
10982 rtx lab = const0_rtx;
10983 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10984 if (flag_pic)
10985 switch (kind)
10987 case FUNCTION_ORDINARY:
10988 break;
10989 case SFUNC_GOT:
10991 rtx reg = target ? target : gen_reg_rtx (Pmode);
10993 emit_insn (gen_symGOT2reg (reg, sym));
10994 sym = reg;
10995 break;
10997 case SFUNC_STATIC:
10999 rtx reg = target ? target : gen_reg_rtx (Pmode);
11001 if (TARGET_FDPIC)
11003 /* We use PC-relative calls, since GOTOFF can only refer
11004 to writable data. This works along with sh_sfunc_call. */
11005 lab = PATTERN (gen_call_site ());
11006 emit_insn (gen_sym_label2reg (reg, sym, lab));
11008 else
11010 /* ??? To allow cse to work, we use GOTOFF relocations.
11011 we could add combiner patterns to transform this into
11012 straight pc-relative calls with sym2PIC / bsrf when
11013 label load and function call are still 1:1 and in the
11014 same basic block during combine. */
11015 emit_insn (gen_symGOTOFF2reg (reg, sym));
11018 sym = reg;
11019 break;
11022 if (target && sym != target)
11024 emit_move_insn (target, sym);
11025 return function_symbol_result (target, lab);
11027 return function_symbol_result (sym, lab);
11030 /* Find the number of the first general purpose register in S that
11031 is not set. */
11032 static int
11033 scavenge_reg (HARD_REG_SET *s)
11035 for (int r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11036 if (TEST_HARD_REG_BIT (*s, r))
11037 return r;
11038 return -1;
11042 sh_get_pr_initial_val (void)
11044 /* If we haven't finished rtl generation, there might be a nonlocal label
11045 that we haven't seen yet.
11046 ??? get_hard_reg_initial_val fails if it is called after register
11047 allocation has started, unless it has been called before for the
11048 same register. And even then, we end in trouble if we didn't use
11049 the register in the same basic block before. So call
11050 get_hard_reg_initial_val now and wrap it in an unspec if we might
11051 need to replace it. */
11052 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11053 combine can put the pseudo returned by get_hard_reg_initial_val into
11054 instructions that need a general purpose registers, which will fail to
11055 be recognized when the pseudo becomes allocated to PR. */
11056 rtx val = get_hard_reg_initial_val (Pmode, PR_REG);
11057 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11060 bool
11061 sh_expand_t_scc (rtx operands[])
11063 enum rtx_code code = GET_CODE (operands[1]);
11064 rtx target = operands[0];
11065 rtx op0 = operands[2];
11066 rtx op1 = operands[3];
11067 rtx result = target;
11069 if (!REG_P (op0) || REGNO (op0) != T_REG
11070 || !CONST_INT_P (op1))
11071 return false;
11072 if (!REG_P (result))
11073 result = gen_reg_rtx (SImode);
11074 HOST_WIDE_INT val = INTVAL (op1);
11075 if ((code == EQ && val == 1) || (code == NE && val == 0))
11076 emit_insn (gen_movt (result, get_t_reg_rtx ()));
11077 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11078 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
11079 else if (code == EQ || code == NE)
11080 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11081 else
11082 return false;
11083 if (result != target)
11084 emit_move_insn (target, result);
11085 return true;
11088 /* INSN is an sfunc; return the rtx that describes the address used. */
11089 static rtx
11090 extract_sfunc_addr (rtx insn)
11092 rtx pattern = PATTERN (insn);
11093 const int len = XVECLEN (pattern, 0);
11094 for (int i = 0; i < len; i++)
11096 rtx part = XVECEXP (pattern, 0, i);
11097 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11098 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11099 return XEXP (part, 0);
11101 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11102 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11105 /* Verify that the register in use_sfunc_addr still agrees with the address
11106 used in the sfunc. This prevents fill_slots_from_thread from changing
11107 use_sfunc_addr.
11108 INSN is the use_sfunc_addr instruction, and REG is the register it
11109 guards. */
11110 bool
11111 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
11113 /* Search for the sfunc. It should really come right after INSN. */
11114 while ((insn = NEXT_INSN (insn)))
11116 if (LABEL_P (insn) || JUMP_P (insn))
11117 break;
11118 if (! INSN_P (insn))
11119 continue;
11121 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
11122 insn = seq->insn (0);
11123 if (GET_CODE (PATTERN (insn)) != PARALLEL
11124 || get_attr_type (insn) != TYPE_SFUNC)
11125 continue;
11126 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11128 gcc_unreachable ();
11131 /* This function returns a constant rtx that represents 2**15 / pi in
11132 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
11133 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
11134 static GTY(()) rtx sh_fsca_sf2int_rtx;
11137 sh_fsca_sf2int (void)
11139 if (! sh_fsca_sf2int_rtx)
11141 REAL_VALUE_TYPE rv;
11143 real_from_string (&rv, "10430.378350470453");
11144 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11147 return sh_fsca_sf2int_rtx;
11150 /* This function returns a constant rtx that represents pi / 2**15 in
11151 SFmode. It's used to scale SFmode angles, in radians, to a
11152 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
11153 maps to 0x10000. */
11154 static GTY(()) rtx sh_fsca_int2sf_rtx;
11157 sh_fsca_int2sf (void)
11159 if (! sh_fsca_int2sf_rtx)
11161 REAL_VALUE_TYPE rv;
11163 real_from_string (&rv, "9.587379924285257e-5");
11164 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11167 return sh_fsca_int2sf_rtx;
11170 /* Initialize the CUMULATIVE_ARGS structure. */
11171 void
11172 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11173 tree fntype,
11174 rtx libname ATTRIBUTE_UNUSED,
11175 tree fndecl,
11176 signed int n_named_args,
11177 machine_mode mode)
11179 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11180 pcum->free_single_fp_reg = 0;
11181 pcum->outgoing = n_named_args != -1;
11183 /* FIXME: Should we check TARGET_HITACHI here ??? */
11184 pcum->renesas_abi = sh_attr_renesas_p (fntype);
11186 if (fntype)
11188 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11189 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11190 pcum->prototype_p = prototype_p (fntype);
11191 pcum->arg_count [(int) SH_ARG_INT] = false;
11193 else
11195 pcum->arg_count [(int) SH_ARG_INT] = 0;
11196 pcum->prototype_p = false;
11197 if (mode != VOIDmode)
11199 /* If the default ABI is the Renesas ABI then all library
11200 calls must assume that the library will be using the
11201 Renesas ABI. So if the function would return its result
11202 in memory then we must force the address of this memory
11203 block onto the stack. Ideally we would like to call
11204 targetm.calls.return_in_memory() here but we do not have
11205 the TYPE or the FNDECL available so we synthesize the
11206 contents of that function as best we can. */
11207 pcum->force_mem =
11208 (TARGET_DEFAULT & MASK_HITACHI)
11209 && (mode == BLKmode
11210 || (GET_MODE_SIZE (mode) > 4
11211 && !(mode == DFmode
11212 && TARGET_FPU_DOUBLE)));
11214 else
11215 pcum->force_mem = false;
11220 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
11222 enum rtx_code code = TRUNCATE;
11224 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11226 rtx inner = XEXP (x, 0);
11227 machine_mode inner_mode = GET_MODE (inner);
11229 if (inner_mode == mode)
11230 return inner;
11231 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11232 x = inner;
11233 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11234 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11236 code = GET_CODE (x);
11237 x = inner;
11240 return gen_rtx_fmt_e (code, mode, x);
11243 /* Load and store depend on the highpart of the address. However,
11244 set_attr_alternative does not give well-defined results before reload,
11245 so we must look at the rtl ourselves to see if any of the feeding
11246 registers is used in a memref.
11248 Return true iff INSN contains a MEM. */
11249 bool
11250 sh_contains_memref_p (rtx insn)
11252 subrtx_iterator::array_type array;
11253 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
11254 if (MEM_P (*iter))
11255 return true;
11256 return false;
11259 /* Return true iff INSN loads a banked register. */
11260 bool
11261 sh_loads_bankedreg_p (rtx insn)
11263 if (GET_CODE (PATTERN (insn)) == SET)
11265 rtx op = SET_DEST (PATTERN(insn));
11266 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11267 return true;
11270 return false;
11273 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
11274 static reg_class_t
11275 sh_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
11277 return rclass;
11280 /* Implement TARGET_SECONDARY_RELOAD. */
11281 static reg_class_t
11282 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11283 machine_mode mode, secondary_reload_info *sri)
11285 enum reg_class rclass = (enum reg_class) rclass_i;
11287 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
11288 && REG_P (XEXP (XEXP (x, 0), 0))
11289 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
11290 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11292 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
11293 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11295 if (REG_P (x) && REGNO (x) == GBR_REG)
11296 return NO_REGS;
11298 if (in_p)
11300 if (REGCLASS_HAS_FP_REG (rclass)
11301 && immediate_operand ((x), mode)
11302 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
11303 switch (mode)
11305 case E_SFmode:
11306 sri->icode = CODE_FOR_reload_insf__frn;
11307 return NO_REGS;
11308 case E_DFmode:
11309 sri->icode = CODE_FOR_reload_indf__frn;
11310 return NO_REGS;
11311 case E_SImode:
11312 /* ??? If we knew that we are in the appropriate mode -
11313 single precision - we could use a reload pattern directly. */
11314 return FPUL_REGS;
11315 default:
11316 abort ();
11318 if (rclass == FPUL_REGS
11319 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11320 || REGNO (x) == T_REG))
11321 || GET_CODE (x) == PLUS))
11322 return GENERAL_REGS;
11323 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11325 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11326 return GENERAL_REGS;
11327 else if (mode == SFmode)
11328 return FP_REGS;
11329 sri->icode = CODE_FOR_reload_insi__i_fpul;
11330 return NO_REGS;
11332 if (rclass == FPSCR_REGS
11333 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11334 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11335 return GENERAL_REGS;
11336 } /* end of input-only processing. */
11338 if (((REGCLASS_HAS_FP_REG (rclass)
11339 && (REG_P (x)
11340 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11341 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11342 && TARGET_FMOVD))))
11343 || (REGCLASS_HAS_GENERAL_REG (rclass)
11344 && REG_P (x)
11345 && FP_REGISTER_P (REGNO (x))))
11346 && (mode == SFmode || mode == SImode))
11347 return FPUL_REGS;
11348 if ((rclass == FPUL_REGS
11349 || (REGCLASS_HAS_FP_REG (rclass) && mode == SImode))
11350 && (MEM_P (x)
11351 || (REG_P (x)
11352 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11353 || REGNO (x) == T_REG
11354 || system_reg_operand (x, VOIDmode)))))
11356 if (rclass == FPUL_REGS)
11357 return GENERAL_REGS;
11358 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
11361 if ((rclass == MAC_REGS || rclass == PR_REGS)
11362 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11363 && rclass != REGNO_REG_CLASS (REGNO (x)))
11364 return GENERAL_REGS;
11366 /* If here fall back to loading FPUL register through general registers.
11367 This case can happen when movsi_ie insn is picked initially to
11368 load/store the FPUL register from/to another register, and then the
11369 other register is allocated on the stack. */
11370 if (rclass == FPUL_REGS && true_regnum (x) == -1)
11371 return GENERAL_REGS;
11373 /* Force mov.b / mov.w displacement addressing insn to use R0 as
11374 the other operand.
11375 On SH2A could also just leave it alone here, which would result in a
11376 4 byte move insn being generated instead. However, for this to work
11377 the insns must have the appropriate alternatives. */
11378 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11379 && satisfies_constraint_Sdd (x)
11380 && sh_disp_addr_displacement (x)
11381 <= sh_max_mov_insn_displacement (mode, false))
11382 return R0_REGS;
11384 /* When reload is trying to address a QImode or HImode subreg on the stack,
11385 force any subreg byte into R0_REGS, as this is going to become a
11386 displacement address.
11387 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
11388 is on the stack, the memref to it might already require a displacement
11389 and that has to be added to the final address. At this point we don't
11390 know the cumulative displacement so we assume the worst case. */
11391 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11392 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
11393 return R0_REGS;
11395 return NO_REGS;
11398 /* Return true if SUBST can't safely replace its equivalent during RA. */
11399 static bool
11400 sh_cannot_substitute_mem_equiv_p (rtx)
11402 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
11403 uses R0 and may cause spill failure when R0 is already used.
11404 We have to return true for that case at least.
11405 Moreover SH has strong R0 parity and also have not enough numbers of
11406 the hard registers to make the equiv substitution win in the size
11407 and the speed on average working sets. The pseudos produced to
11408 hold the equiv values can't get good hard registers for bad cases
11409 and end up memory save/restore insns which make the code worse. */
11410 return true;
11413 /* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
11414 static bool
11415 sh_legitimize_address_displacement (rtx *offset1, rtx *offset2,
11416 poly_int64 orig_offset,
11417 machine_mode mode)
11419 if ((TARGET_FPU_DOUBLE && mode == DFmode)
11420 || (TARGET_SH2E && mode == SFmode))
11421 return false;
11423 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, orig_offset);
11424 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
11426 *offset1 = adj.offset_adjust;
11427 *offset2 = adj.mov_disp;
11428 return true;
11431 return false;
11434 /* Return true if movsf insn should be splited with an additional
11435 register. */
11436 bool
11437 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
11439 /* op0 == op1 */
11440 if (rtx_equal_p (op0, op1))
11441 return true;
11442 /* fy, FQ, reg */
11443 if (GET_CODE (op1) == CONST_DOUBLE
11444 && ! satisfies_constraint_G (op1)
11445 && ! satisfies_constraint_H (op1)
11446 && REG_P (op0)
11447 && REG_P (op2))
11448 return true;
11449 /* f, r, y */
11450 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
11451 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
11452 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11453 return true;
11454 /* r, f, y */
11455 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
11456 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
11457 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11458 return true;
11460 return false;
11463 static void
11464 sh_conditional_register_usage (void)
11466 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
11467 if (! VALID_REGISTER_P (regno))
11468 fixed_regs[regno] = call_used_regs[regno] = 1;
11469 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
11470 if (flag_pic)
11472 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11473 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11475 if (TARGET_FDPIC)
11477 fixed_regs[PIC_REG] = 1;
11478 call_used_regs[PIC_REG] = 1;
11479 call_really_used_regs[PIC_REG] = 1;
11481 /* Renesas saves and restores mac registers on call. */
11482 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
11484 call_really_used_regs[MACH_REG] = 0;
11485 call_really_used_regs[MACL_REG] = 0;
11488 for (int regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
11489 if (! fixed_regs[regno] && call_really_used_regs[regno])
11490 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
11492 call_really_used_regs[FPSCR_MODES_REG] = 0;
11493 call_really_used_regs[FPSCR_STAT_REG] = 0;
11496 /* Implement TARGET_LEGITIMATE_CONSTANT_P
11498 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
11499 static bool
11500 sh_legitimate_constant_p (machine_mode mode, rtx x)
11502 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
11504 rtx base, offset;
11505 split_const (x, &base, &offset);
11507 if (GET_CODE (base) == SYMBOL_REF
11508 && !offset_within_block_p (base, INTVAL (offset)))
11509 return false;
11512 if (TARGET_FDPIC
11513 && (SYMBOLIC_CONST_P (x)
11514 || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
11515 && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
11516 return false;
11518 return GET_CODE (x) != CONST_DOUBLE
11519 || mode == DFmode || mode == SFmode
11520 || mode == DImode || GET_MODE (x) == VOIDmode;
11523 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11525 static void
11526 sh_init_sync_libfuncs (void)
11528 init_sync_libfuncs (UNITS_PER_WORD);
11531 /* Return true if it is appropriate to emit `ret' instructions in the
11532 body of a function. */
11533 bool
11534 sh_can_use_simple_return_p (void)
11536 if (! reload_completed || frame_pointer_needed)
11537 return false;
11539 /* Moving prologue around does't reduce the size. */
11540 if (optimize_function_for_size_p (cfun))
11541 return false;
11543 /* Finally, allow for pr save. */
11544 HARD_REG_SET live_regs_mask;
11545 int d = calc_live_regs (&live_regs_mask);
11547 if (rounded_frame_size (d) > 4)
11548 return false;
11550 return true;
11553 /*------------------------------------------------------------------------------
11554 Address mode optimization support code
11557 typedef HOST_WIDE_INT disp_t;
11558 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
11559 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
11560 static const disp_t INVALID_DISP = MAX_DISP;
11562 /* A memory reference which is described by a base register and a
11563 displacement. */
11564 class base_reg_disp
11566 public:
11567 base_reg_disp (rtx br, disp_t d);
11569 bool is_reg (void) const;
11570 bool is_disp (void) const;
11571 rtx reg (void) const;
11572 disp_t disp (void) const;
11574 private:
11575 rtx reg_;
11576 disp_t disp_;
11579 inline
11580 base_reg_disp::base_reg_disp (rtx br, disp_t d)
11581 : reg_ (br), disp_ (d)
11585 inline bool
11586 base_reg_disp::is_reg (void) const
11588 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
11591 inline bool
11592 base_reg_disp::is_disp (void) const
11594 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
11597 inline rtx
11598 base_reg_disp::reg (void) const
11600 return reg_;
11603 inline disp_t
11604 base_reg_disp::disp (void) const
11606 return disp_;
11609 /* Find the base register and calculate the displacement for a given
11610 address rtx 'x'. */
11611 static base_reg_disp
11612 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
11613 rtx base_reg = NULL)
11615 if (REG_P (x))
11617 if (REGNO (x) == GBR_REG)
11618 return base_reg_disp (x, disp);
11620 /* We've reached a hard-reg. This is probably the point where
11621 function args are copied to pseudos. Do not go any further and
11622 stick to the pseudo. If the original mem addr was in a hard reg
11623 from the beginning, it will become the base reg. */
11624 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
11625 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
11627 /* Find the def of the reg and trace it. If there are more than one
11628 defs and they are not the same, assume it's not safe to proceed. */
11629 rtx_insn* last_i = NULL;
11630 rtx last_set = NULL;
11631 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
11632 d = DF_REF_NEXT_REG (d))
11634 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
11636 /* Accept multiple defs, as long as they are equal. */
11637 if (last_set == NULL || rtx_equal_p (last_set, set))
11639 last_i = DF_REF_INSN (d);
11640 last_set = set;
11642 else
11644 last_i = NULL;
11645 last_set = NULL;
11646 break;
11650 if (last_set != NULL && last_i != NULL)
11651 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
11652 XEXP (last_set, 0));
11654 /* When here, no previous insn was found that sets the reg.
11655 The input reg is already the base reg. */
11656 return base_reg_disp (x, disp);
11659 else if (GET_CODE (x) == PLUS)
11661 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
11662 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
11664 /* Either left or right val must be a reg.
11665 We don't handle the case of 'reg + reg' here. */
11666 if (left_val.is_reg () && right_val.is_disp ())
11667 return base_reg_disp (left_val.reg (), left_val.disp ()
11668 + right_val.disp () + disp);
11669 else if (right_val.is_reg () && left_val.is_disp ())
11670 return base_reg_disp (right_val.reg (), right_val.disp ()
11671 + left_val.disp () + disp);
11672 else
11673 return base_reg_disp (base_reg, disp);
11676 else if (CONST_INT_P (x))
11677 return base_reg_disp (NULL, disp + INTVAL (x));
11679 /* Didn't find anything useful. */
11680 return base_reg_disp (base_reg, disp);
11683 /* Given an insn and a memory operand, try to find an equivalent GBR
11684 based memory address and return the corresponding new memory address.
11685 Return NULL_RTX if not found. */
11687 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
11689 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
11690 return NULL_RTX;
11692 /* Leave post/pre inc/dec or any other side effect addresses alone. */
11693 if (side_effects_p (XEXP (mem, 0)))
11694 return NULL_RTX;
11696 /* When not optimizing there might be no dataflow available. */
11697 if (df == NULL)
11698 return NULL_RTX;
11700 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
11702 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
11704 /* If GBR is marked as call clobbered we bail out if we see a call.
11705 FIXME: Actually should check if this mem refers to the gbr value
11706 before or after the call. If there is a store_gbr preceeding this
11707 mem, it's safe to use GBR for this mem.
11709 If GBR is not marked as call clobbered, but there is some other
11710 def than a call, it's probably a load_gbr upon which we also
11711 bail out to be on the safe side.
11712 FIXME: Should check if we have a use-after-def case, such as
11713 the call case above. */
11714 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
11715 d = DF_REF_NEXT_REG (d))
11717 if (CALL_P (DF_REF_INSN (d)))
11719 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
11720 return NULL_RTX;
11721 else
11722 continue;
11724 else
11725 return NULL_RTX;
11728 rtx disp = GEN_INT (gbr_disp.disp ());
11729 if (gbr_displacement (disp, GET_MODE (mem)))
11730 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
11733 return NULL_RTX;
11736 /*------------------------------------------------------------------------------
11737 Manual insn combine support code.
11740 /* Return true if the specified insn contains any UNSPECs or
11741 UNSPEC_VOLATILEs. */
11742 static bool
11743 sh_unspec_insn_p (rtx x)
11745 subrtx_iterator::array_type array;
11746 FOR_EACH_SUBRTX (i, array, x, ALL)
11747 if (*i != NULL
11748 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
11749 return true;
11751 return false;
11754 /* Return true if the register operands of the specified insn are modified
11755 between the specified from and to insns (exclusive of those two). */
11756 bool
11757 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
11758 const rtx_insn* from,
11759 const rtx_insn* to)
11761 /* FIXME: Return true for multiple sets for now. */
11762 rtx s = single_set (operands_insn);
11763 if (s == NULL_RTX)
11764 return true;
11766 subrtx_iterator::array_type array;
11767 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
11768 if (*i != NULL &&
11769 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
11770 return true;
11772 return false;
11775 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
11776 negates the T bit and stores the result in the T bit. */
11777 bool
11778 sh_is_nott_insn (const rtx_insn* i)
11780 return i != NULL && GET_CODE (PATTERN (i)) == SET
11781 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
11782 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
11786 sh_movt_set_dest (const rtx_insn* i)
11788 return i == NULL ? NULL : sh_movt_set_dest (PATTERN (i));
11792 sh_movt_set_dest (const_rtx pat)
11794 return GET_CODE (pat) == SET
11795 && arith_reg_dest (XEXP (pat, 0), SImode)
11796 && t_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11799 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
11800 that stores the negated T bit in a register, and return the destination
11801 register rtx, or null. */
11803 sh_movrt_set_dest (const rtx_insn* i)
11805 return i == NULL ? NULL : sh_movrt_set_dest (PATTERN (i));
11809 sh_movrt_set_dest (const_rtx pat)
11811 /* The negc movrt replacement is inside a parallel. */
11812 if (GET_CODE (pat) == PARALLEL)
11813 pat = XVECEXP (pat, 0, 0);
11815 return GET_CODE (pat) == SET
11816 && arith_reg_dest (XEXP (pat, 0), SImode)
11817 && negt_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11821 /* Given an insn and a reg number, tell whether the reg dies or is unused
11822 after the insn. */
11823 bool
11824 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
11826 return find_regno_note (i, REG_DEAD, regno) != NULL
11827 || find_regno_note (i, REG_UNUSED, regno) != NULL;
11830 /* Given an insn and a reg number, remove reg dead or reg unused notes to
11831 mark it as being used after the insn. */
11832 void
11833 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
11835 if (rtx n = find_regno_note (i, REG_DEAD, regno))
11836 remove_note (i, n);
11837 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
11838 remove_note (i, n);
11841 /* Given an insn check if it contains any post/pre inc/dec mem operands and
11842 add the REG_INC notes accordingly.
11843 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
11844 FIXME: This function is currently used by peephole2 patterns because
11845 the peephole2 pass does not preserve REG_INC notes. If the notes
11846 are dropped the following passes will do wrong things. */
11847 rtx_insn*
11848 sh_check_add_incdec_notes (rtx_insn* i)
11850 struct for_each_inc_dec_clb
11852 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
11853 rtx dest, rtx src ATTRIBUTE_UNUSED,
11854 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
11856 gcc_assert (REG_P (dest));
11858 rtx_insn* i = (rtx_insn*)arg;
11859 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
11860 add_reg_note (i, REG_INC, dest);
11862 return 0;
11866 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
11867 return i;
11870 /* Given a move insn destiation and a source, make sure that the move source
11871 operand is not a post-inc mem load with the same address reg as the
11872 destination. Returns the modified source operand with the post-inc removed
11873 if necessary. */
11875 sh_remove_overlapping_post_inc (rtx dst, rtx src)
11877 if (!MEM_P (src))
11878 return src;
11880 rtx addr = XEXP (src, 0);
11882 if (GET_CODE (addr) == POST_INC
11883 && reg_overlap_mentioned_p (XEXP (addr, 0), dst))
11884 return replace_equiv_address (src, XEXP (addr, 0));
11886 gcc_assert (GET_CODE (addr) != POST_MODIFY);
11887 return src;
11890 /* Emit a move insn that is safe to be used in peephole patterns. */
11891 rtx_insn*
11892 sh_peephole_emit_move_insn (rtx dst, rtx src)
11894 return sh_check_add_incdec_notes (
11895 emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src)));
11898 /* Given an op rtx and an insn, try to find out whether the result of the
11899 specified op consists only of logical operations on T bit stores. */
11900 bool
11901 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
11903 if (!logical_operator (op, SImode))
11904 return false;
11906 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
11907 int op_is_t_count = 0;
11909 for (int i = 0; i < 2; ++i)
11911 if (t_reg_operand (ops[i], VOIDmode)
11912 || negt_reg_operand (ops[i], VOIDmode))
11913 op_is_t_count++;
11915 else
11917 set_of_reg op_set = sh_find_set_of_reg
11918 (ops[i], insn, prev_nonnote_nondebug_insn_bb);
11919 if (op_set.set_src == NULL_RTX)
11920 continue;
11922 if (t_reg_operand (op_set.set_src, VOIDmode)
11923 || negt_reg_operand (op_set.set_src, VOIDmode)
11924 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
11925 op_is_t_count++;
11929 return op_is_t_count == 2;
11932 /* Given the operand that is extended in a sign/zero extend insn, and the
11933 insn, try to figure out whether the sign/zero extension can be replaced
11934 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
11935 NULL_RTX otherwise. */
11937 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
11939 if (REG_P (extended_op))
11940 extended_op = extended_op;
11941 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
11942 extended_op = SUBREG_REG (extended_op);
11943 else
11944 return NULL_RTX;
11946 /* Reg moves must be of the same mode. */
11947 if (GET_MODE (extended_op) != SImode)
11948 return NULL_RTX;
11950 set_of_reg s = sh_find_set_of_reg (extended_op, insn,
11951 prev_nonnote_nondebug_insn_bb);
11952 if (s.set_src == NULL_RTX)
11953 return NULL_RTX;
11955 if (t_reg_operand (s.set_src, VOIDmode)
11956 || negt_reg_operand (s.set_src, VOIDmode))
11957 return extended_op;
11959 /* If the zero extended reg was formed by a logical operation, check the
11960 operands of the logical operation. If both originated from T bit
11961 stores the zero extension can be eliminated. */
11962 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
11963 return extended_op;
11965 return NULL_RTX;
11968 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
11969 figure out whether it should be converted into a movt-xor sequence in
11970 the movrt_negc splitter.
11971 Returns true if insns have been modified and the splitter has succeeded. */
11972 bool
11973 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
11975 /* In cases such as
11976 tst r4,r4
11977 mov #-1,r1
11978 negc r1,r1
11979 tst r4,r4
11980 we can replace the T bit clobbering negc with a movt-xor sequence and
11981 eliminate the redundant comparison.
11982 Because the xor insn depends on register allocation results, allow this
11983 only before reload. */
11984 if (!can_create_pseudo_p ())
11985 return false;
11987 set_of_reg t_before_negc = sh_find_set_of_reg
11988 (get_t_reg_rtx (), curr_insn, prev_nonnote_nondebug_insn_bb);
11989 set_of_reg t_after_negc = sh_find_set_of_reg
11990 (get_t_reg_rtx (), curr_insn, next_nonnote_nondebug_insn_bb);
11992 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
11993 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
11994 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11995 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
11996 t_before_negc.insn,
11997 t_after_negc.insn)
11998 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11999 && !sh_unspec_insn_p (t_after_negc.insn)
12000 && !volatile_insn_p (PATTERN (t_after_negc.insn))
12001 && !side_effects_p (PATTERN (t_after_negc.insn))
12002 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
12004 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
12005 set_insn_deleted (t_after_negc.insn);
12006 return true;
12008 else
12009 return false;
12012 /* Given a reg and the current insn, see if the value of the reg originated
12013 from a sign or zero extension and return the discovered information. */
12014 sh_extending_set_of_reg
12015 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
12017 if (reg == NULL)
12018 return sh_extending_set_of_reg (curr_insn);
12020 if (SUBREG_P (reg))
12021 reg = SUBREG_REG (reg);
12023 if (!REG_P (reg))
12024 return sh_extending_set_of_reg (curr_insn);
12026 /* FIXME: Also search the predecessor basic blocks. It seems that checking
12027 only the adjacent predecessor blocks would cover most of the cases.
12028 Also try to look through the first extension that we hit. There are some
12029 cases, where a zero_extend is followed an (implicit) sign_extend, and it
12030 fails to see the sign_extend. */
12031 sh_extending_set_of_reg result = sh_find_set_of_reg
12032 (reg, curr_insn, prev_nonnote_nondebug_insn_bb, true);
12034 if (result.set_src != NULL)
12036 if (GET_CODE (result.set_src) == SIGN_EXTEND
12037 || GET_CODE (result.set_src) == ZERO_EXTEND)
12039 if (dump_file)
12040 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12041 "explicitly sign/zero extended in insn %d\n",
12042 REGNO (reg), INSN_UID (result.insn));
12043 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
12044 result.ext_code = GET_CODE (result.set_src);
12046 else if (MEM_P (result.set_src)
12047 && (GET_MODE (result.set_src) == QImode
12048 || GET_MODE (result.set_src) == HImode)
12049 && !sh_unspec_insn_p (result.insn))
12051 /* On SH QIHImode memory loads always sign extend. However, in
12052 some cases where it seems that the higher bits are not
12053 interesting, the loads will not be expanded as sign extending
12054 insns, but as QIHImode loads into QIHImode regs. We report that
12055 the reg has been sign extended by the mem load. When it is used
12056 as such, we must convert the mem load into a sign extending insn,
12057 see also sh_extending_set_of_reg::use_as_extended_reg. */
12058 if (dump_file)
12059 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12060 "implicitly sign extended in insn %d\n",
12061 REGNO (reg), INSN_UID (result.insn));
12062 result.from_mode = GET_MODE (result.set_src);
12063 result.ext_code = SIGN_EXTEND;
12067 return result;
12070 /* Given a reg that is known to be sign or zero extended at some insn,
12071 take the appropriate measures so that the extended value can be used as
12072 a reg at the specified insn and return the resulting reg rtx. */
12074 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
12076 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
12077 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
12078 gcc_assert (from_mode == QImode || from_mode == HImode);
12080 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
12082 if (dump_file)
12083 fprintf (dump_file,
12084 "use_as_extended_reg: converting non-extending mem load in "
12085 "insn %d into sign-extending load\n", INSN_UID (insn));
12087 rtx r = gen_reg_rtx (SImode);
12088 rtx_insn* i0;
12089 if (from_mode == QImode)
12090 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
12091 else if (from_mode == HImode)
12092 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
12093 else
12094 gcc_unreachable ();
12096 emit_insn_after (
12097 gen_move_insn (XEXP (set_rtx, 0),
12098 gen_lowpart (GET_MODE (set_src), r)), i0);
12099 set_insn_deleted (insn);
12100 return r;
12102 else
12104 rtx extension_dst = XEXP (set_rtx, 0);
12105 if (GET_MODE (extension_dst) != SImode)
12106 extension_dst = simplify_gen_subreg (SImode, extension_dst,
12107 GET_MODE (extension_dst), 0);
12108 if (modified_between_p (extension_dst, insn, use_at_insn))
12110 if (dump_file)
12111 fprintf (dump_file,
12112 "use_as_extended_reg: dest reg %d of extending insn %d is "
12113 "modified, inserting a reg-reg copy\n",
12114 REGNO (extension_dst), INSN_UID (insn));
12116 rtx r = gen_reg_rtx (SImode);
12117 emit_insn_after (gen_move_insn (r, extension_dst), insn);
12118 return r;
12120 else
12122 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
12123 return extension_dst;
12128 bool
12129 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
12131 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
12132 && (from_mode == QImode || from_mode == HImode)
12133 && set_src != NULL)
12134 return arith_reg_operand (XEXP (set_src, 0), from_mode);
12135 else
12136 return false;
12140 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
12142 gcc_assert (can_use_as_unextended_reg ());
12144 rtx r = XEXP (set_src, 0);
12145 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
12147 if (modified_between_p (r, insn, use_at_insn))
12149 rtx r1 = gen_reg_rtx (SImode);
12150 emit_insn_after (gen_move_insn (r1, r0), insn);
12151 return r1;
12153 else
12155 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
12156 ? REGNO (SUBREG_REG (r))
12157 : REGNO (r));
12158 return r0;
12162 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
12163 perform the necessary checks on the operands and split it accordingly. */
12164 void
12165 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
12166 int subreg_offset, rtx operands[])
12168 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
12170 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
12171 curr_insn);
12172 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
12173 curr_insn);
12175 /* If one of the operands is known to be zero extended, that's already
12176 sufficient to mask out the unwanted high bits. */
12177 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
12179 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12180 operands[1]));
12181 return;
12183 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
12185 emit_insn (gen_tstsi_t (operands[0],
12186 eop1.use_as_extended_reg (curr_insn)));
12187 return;
12190 /* None of the operands seem to be zero extended.
12191 If both are sign extended it's OK, too. */
12192 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
12193 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
12195 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12196 eop1.use_as_extended_reg (curr_insn)));
12197 return;
12200 /* Otherwise we have to insert a zero extension on one of the operands to
12201 mask out the unwanted high bits.
12202 Prefer the operand that has no known extension. */
12203 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
12204 std::swap (operands[0], operands[1]);
12206 rtx tmp0 = gen_reg_rtx (SImode);
12207 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
12208 GET_MODE (operands[0]), subreg_offset);
12209 emit_insn (subreg_mode == QImode
12210 ? gen_zero_extendqisi2 (tmp0, tmp1)
12211 : gen_zero_extendhisi2 (tmp0, tmp1));
12212 emit_insn (gen_tstsi_t (tmp0, operands[1]));
12215 /* A helper class to increment/decrement a counter variable each time a
12216 function is entered/left. */
12217 class scope_counter
12219 public:
12220 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
12222 ~scope_counter (void)
12224 --m_counter;
12225 gcc_assert (m_counter >= 0);
12228 int count (void) const { return m_counter; }
12230 private:
12231 int& m_counter;
12234 /* Given an rtx x, determine whether the expression can be used to create
12235 an insn that calulates x and stores the result in the T bit.
12236 This is used by the 'treg_set_expr' predicate to construct insns sequences
12237 where T bit results are fed into other insns, such as addc, subc, negc
12238 insns.
12240 FIXME: The patterns that expand 'treg_set_expr' operands tend to
12241 distinguish between 'positive' and 'negative' forms. For now this has to
12242 be done in the preparation code. We could also introduce
12243 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
12244 two different patterns for the 'postive' and 'negative' forms. However,
12245 the total amount of lines of code seems to be about the same and the
12246 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
12247 recog function would need to look inside the expression by temporarily
12248 splitting it. */
12249 static int sh_recog_treg_set_expr_reent_count = 0;
12251 bool
12252 sh_recog_treg_set_expr (rtx op, machine_mode mode)
12254 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
12256 /* Limit the recursion count to avoid nested expressions which we can't
12257 resolve to a single treg set insn. */
12258 if (recursion.count () > 1)
12259 return false;
12261 /* Early accept known possible operands before doing recog. */
12262 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode)
12263 || negt_reg_operand (op, mode))
12264 return true;
12266 /* Early reject impossible operands before doing recog.
12267 There are some (set ((t) (subreg ...))) patterns, but we must be careful
12268 not to allow any invalid reg-reg or mem-reg moves, or else other passes
12269 such as lower-subreg will bail out. Some insns such as SH4A movua are
12270 done with UNSPEC, so must reject those, too, or else it would result
12271 in an invalid reg -> treg move. */
12272 if (CONST_INT_P (op) || register_operand (op, mode)
12273 || memory_operand (op, mode) || sh_unspec_insn_p (op))
12274 return false;
12276 if (!can_create_pseudo_p ())
12277 return false;
12279 /* expand_debug_locations may call this to compute rtx costs at
12280 very early stage. In that case, don't make new insns here to
12281 avoid codegen differences with -g. */
12282 if (currently_expanding_to_rtl)
12283 return false;
12285 /* We are going to invoke recog in a re-entrant way and thus
12286 have to capture its current state and restore it afterwards. */
12287 recog_data_d prev_recog_data = recog_data;
12289 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
12290 SET_PREV_INSN (i) = NULL;
12291 SET_NEXT_INSN (i) = NULL;
12293 /* If the comparison op doesn't have a result mode, set it to SImode. */
12294 machine_mode prev_op_mode = GET_MODE (op);
12295 if (COMPARISON_P (op) && prev_op_mode == VOIDmode)
12296 PUT_MODE (op, SImode);
12298 int result = recog (PATTERN (i), i, 0);
12300 /* It seems there is no insn like that. Create a negated version and
12301 try again. If we hit a negated form, we'll allow that and append a
12302 nott sequence when splitting out the insns. Insns that do the split
12303 can then remove the trailing nott if they know how to deal with it. */
12304 if (result < 0 && COMPARISON_P (op))
12306 machine_mode cmp_mode = GET_MODE (XEXP (op, 0));
12307 if (cmp_mode == VOIDmode)
12308 cmp_mode = GET_MODE (XEXP (op, 1));
12310 rtx_code prev_code = GET_CODE (op);
12311 PUT_CODE (op, reverse_condition (GET_CODE (op)));
12312 result = recog (PATTERN (i), i, 0);
12313 PUT_CODE (op, prev_code);
12316 PUT_MODE (op, prev_op_mode);
12317 recog_data = prev_recog_data;
12318 return result >= 0;
12321 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
12322 This can be used as a condition for insn/split patterns to allow certain
12323 T bit setting patters only to be matched as sub expressions of other
12324 patterns. */
12325 bool
12326 sh_in_recog_treg_set_expr (void)
12328 return sh_recog_treg_set_expr_reent_count > 0;
12331 /* Given an rtx x, which is assumed to be some expression that has been
12332 matched by the 'treg_set_expr' predicate before, split and emit the
12333 insns that are necessary to calculate the expression and store the result
12334 in the T bit.
12335 The splitting is done recursively similar to 'try_split' in emit-rt.c.
12336 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
12337 'delete_insn' which then causes the DF parts to bail out, because we
12338 currently are inside another gen_split* function and would invoke
12339 'try_split' in a reentrant way. */
12340 static std::pair<rtx_insn*, rtx_insn*>
12341 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
12343 if (dump_file)
12345 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
12346 print_rtl_single (dump_file, i);
12347 fprintf (dump_file, "\n");
12350 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
12352 if (seq == NULL)
12353 return std::make_pair (i, i);
12355 /* Avoid infinite splitter loops if any insn of the result matches
12356 the original pattern. */
12357 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
12358 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
12359 return std::make_pair (i, i);
12361 unshare_all_rtl_in_chain (seq);
12363 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
12364 a linked list, replace the single insn with the new insns. */
12365 rtx_insn* seqlast = seq;
12366 while (NEXT_INSN (seqlast) != NULL)
12367 seqlast = NEXT_INSN (seqlast);
12369 if (rtx_insn* iprev = PREV_INSN (i))
12370 SET_NEXT_INSN (iprev) = seq;
12371 if (rtx_insn* inext = NEXT_INSN (i))
12372 SET_PREV_INSN (inext) = seqlast;
12374 SET_PREV_INSN (seq) = PREV_INSN (i);
12375 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
12377 SET_PREV_INSN (i) = NULL;
12378 SET_NEXT_INSN (i) = NULL;
12380 /* Recursively split all insns. */
12381 for (i = seq; ; i = NEXT_INSN (i))
12383 std::pair<rtx_insn*, rtx_insn*> ii =
12384 sh_try_split_insn_simple (i, curr_insn, n + 1);
12385 if (i == seq)
12386 seq = ii.first;
12387 if (i == seqlast)
12389 seqlast = ii.second;
12390 break;
12392 i = ii.first;
12395 return std::make_pair (seq, seqlast);
12398 sh_treg_insns
12399 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
12401 if (t_reg_operand (x, VOIDmode))
12402 return sh_treg_insns ();
12404 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
12406 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
12407 SET_PREV_INSN (i) = NULL;
12408 SET_NEXT_INSN (i) = NULL;
12410 if (dump_file)
12412 fprintf (dump_file, "split_treg_set_expr insn:\n");
12413 print_rtl (dump_file, i);
12414 fprintf (dump_file, "\n");
12417 /* If the insn is not found, we will try a negated form and append
12418 a nott. */
12419 bool append_nott = false;
12421 /* We are going to invoke recog/split_insns in a re-entrant way and thus
12422 have to capture its current state and restore it afterwards. */
12423 recog_data_d prev_recog_data = recog_data;
12425 if (negt_reg_operand (x, GET_MODE (x)))
12427 /* This is a normal movt followed by a nott. It will be converted
12428 into a movrt after initial expansion. */
12429 XEXP (PATTERN (i), 1) = get_t_reg_rtx ();
12430 append_nott = true;
12432 else
12434 /* If the comparison op doesn't have a mode set, set it to SImode. */
12435 if (COMPARISON_P (x) && GET_MODE (x) == VOIDmode)
12436 PUT_MODE (x, SImode);
12438 int insn_code = recog (PATTERN (i), i, 0);
12440 if (insn_code < 0 && COMPARISON_P (x))
12442 machine_mode cmp_mode = GET_MODE (XEXP (x, 0));
12443 if (cmp_mode == VOIDmode)
12444 cmp_mode = GET_MODE (XEXP (x, 1));
12446 PUT_CODE (x, reverse_condition (GET_CODE (x)));
12447 insn_code = recog (PATTERN (i), i, 0);
12448 append_nott = true;
12451 gcc_assert (insn_code >= 0);
12454 /* Try to recursively split the insn. Some insns might refuse to split
12455 any further while we are in the treg_set_expr splitting phase. They
12456 will be emitted as part of the outer insn and then split again. */
12457 std::pair<rtx_insn*, rtx_insn*> insnlist =
12458 sh_try_split_insn_simple (i, curr_insn);
12460 /* Restore recog state. */
12461 recog_data = prev_recog_data;
12463 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
12464 ? insnlist.second
12465 : NULL;
12466 if (dump_file)
12468 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
12469 print_rtl (dump_file, insnlist.first);
12470 fprintf (dump_file, "\n");
12472 if (nott_insn != NULL)
12473 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
12476 emit_insn (insnlist.first);
12478 if (nott_insn != NULL && append_nott)
12480 if (dump_file)
12481 fprintf (dump_file, "removing trailing nott\n");
12482 remove_insn (nott_insn);
12483 nott_insn = NULL;
12484 append_nott = false;
12487 if (append_nott)
12488 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
12490 rtx_insn* first_insn = get_insns ();
12492 if (dump_file)
12494 fprintf (dump_file, "resulting insns:\n");
12495 print_rtl (dump_file, first_insn);
12496 fprintf (dump_file, "\n");
12499 return sh_treg_insns (first_insn, nott_insn);
12502 /*------------------------------------------------------------------------------
12503 Mode switching support code.
12506 static void
12507 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
12508 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
12510 if ((TARGET_SH4A_FP || TARGET_SH4_300)
12511 && prev_mode != FP_MODE_NONE && prev_mode != mode)
12513 emit_insn (gen_toggle_pr ());
12514 if (TARGET_FMOVD)
12515 emit_insn (gen_toggle_sz ());
12517 else if (mode != FP_MODE_NONE)
12519 rtx tmp = gen_reg_rtx (SImode);
12520 emit_insn (gen_sts_fpscr (tmp));
12521 rtx i = NULL;
12523 const unsigned HOST_WIDE_INT fpbits =
12524 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
12526 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
12527 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12528 else if (mode == FP_MODE_SINGLE)
12529 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
12530 else if (mode == FP_MODE_DOUBLE)
12531 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12532 else
12533 gcc_unreachable ();
12535 emit_insn (i);
12536 emit_insn (gen_lds_fpscr (tmp));
12540 static int
12541 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
12543 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
12546 static int
12547 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
12549 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
12550 get_attr_fp_set (insn) != FP_SET_NONE)
12551 return (int) get_attr_fp_set (insn);
12552 else
12553 return mode;
12556 static int
12557 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
12559 return NORMAL_MODE (entity);
12562 static int
12563 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
12565 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
12568 static int
12569 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
12571 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
12574 /*------------------------------------------------------------------------------
12575 Misc
12578 /* Return true if we use LRA instead of reload pass. */
12579 bool
12580 sh_lra_p (void)
12582 return sh_lra_flag;
12585 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
12587 static bool
12588 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
12589 unsigned int align,
12590 enum by_pieces_operation op,
12591 bool speed_p)
12593 switch (op)
12595 case MOVE_BY_PIECES:
12596 return by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1, op)
12597 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12598 case STORE_BY_PIECES:
12599 case SET_BY_PIECES:
12600 return by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1, op)
12601 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12602 default:
12603 return default_use_by_pieces_infrastructure_p (size, align,
12604 op, speed_p);
12608 bool
12609 sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
12610 rtx x ATTRIBUTE_UNUSED)
12612 return TARGET_FDPIC;
12615 /* Emit insns to load the function address from FUNCDESC (an FDPIC
12616 function descriptor) into r1 and the GOT address into r12,
12617 returning an rtx for r1. */
12620 sh_load_function_descriptor (rtx funcdesc)
12622 rtx r1 = gen_rtx_REG (Pmode, R1_REG);
12623 rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
12624 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
12625 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
12627 emit_move_insn (r1, fnaddr);
12628 /* The ABI requires the entry point address to be loaded first, so
12629 prevent the load from being moved after that of the GOT
12630 address. */
12631 emit_insn (gen_blockage ());
12632 emit_move_insn (pic_reg, gotaddr);
12633 return r1;
12636 /* Return an rtx holding the initial value of the FDPIC register (the
12637 FDPIC pointer passed in from the caller). */
12640 sh_get_fdpic_reg_initial_val (void)
12642 return get_hard_reg_initial_val (Pmode, PIC_REG);
12645 #include "gt-sh.h"