2018-07-04 Denys Vlasenko <dvlasenk@redhat.com>
[official-gcc.git] / gcc / config / sh / sh.c
bloba1cad42eb70b61ddf31e78c03ab2a5e780c3242d
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2018 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
24 #define IN_TARGET_CODE 1
26 #include "config.h"
27 #define INCLUDE_VECTOR
28 #include "system.h"
29 #include "coretypes.h"
30 #include "backend.h"
31 #include "target.h"
32 #include "rtl.h"
33 #include "tree.h"
34 #include "gimple.h"
35 #include "cfghooks.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "optabs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "flags.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "reload.h"
54 #include "output.h"
55 #include "insn-attr.h"
56 #include "dwarf2.h"
57 #include "langhooks.h"
58 #include "cfgrtl.h"
59 #include "intl.h"
60 #include "sched-int.h"
61 #include "gimplify.h"
62 #include "tm-constrs.h"
63 #include "opts.h"
64 #include "tree-pass.h"
65 #include "context.h"
66 #include "builtins.h"
67 #include "rtl-iter.h"
68 #include "regs.h"
69 #include "toplev.h"
71 /* This file should be included last. */
72 #include "target-def.h"
74 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
76 #define CONST_OK_FOR_ADD(size) CONST_OK_FOR_I08 (size)
77 #define GEN_MOV (*(gen_movsi))
78 #define GEN_ADD3 (*(gen_addsi3))
79 #define GEN_SUB3 (*(gen_subsi3))
81 /* Used to simplify the logic below. Find the attributes wherever
82 they may be. */
83 #define SH_ATTRIBUTES(decl) \
84 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
85 : DECL_ATTRIBUTES (decl) \
86 ? (DECL_ATTRIBUTES (decl)) \
87 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
89 /* Set to true by expand_prologue() when the function is an
90 interrupt handler. */
91 bool current_function_interrupt;
93 tree sh_deferred_function_attributes;
94 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
96 /* Global variables for machine-dependent things. */
98 /* Which cpu are we scheduling for. */
99 enum processor_type sh_cpu;
101 /* Definitions used in ready queue reordering for first scheduling pass. */
103 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
104 static short *regmode_weight[2];
106 /* Total SFmode and SImode weights of scheduled insns. */
107 static int curr_regmode_pressure[2];
109 /* Number of r0 life regions. */
110 static int r0_life_regions;
112 /* If true, skip cycles for Q -> R movement. */
113 static int skip_cycles = 0;
115 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
116 and returned from sh_reorder2. */
117 static short cached_can_issue_more;
119 /* Unique number for UNSPEC_BBR pattern. */
120 static unsigned int unspec_bbr_uid = 1;
122 /* Provides the class number of the smallest class containing
123 reg number. */
124 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
126 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
159 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
160 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
161 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
162 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
163 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
164 GENERAL_REGS, GENERAL_REGS,
167 char sh_register_names[FIRST_PSEUDO_REGISTER] \
168 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
170 char sh_additional_register_names[ADDREGNAMES_SIZE] \
171 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
172 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
174 int assembler_dialect;
176 static void split_branches (rtx_insn *);
177 static int branch_dest (rtx);
178 static void print_slot (rtx_sequence *);
179 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
180 static void dump_table (rtx_insn *, rtx_insn *);
181 static bool broken_move (rtx_insn *);
182 static bool mova_p (rtx_insn *);
183 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
184 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
185 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
186 static void sh_reorg (void);
187 static void sh_option_override (void);
188 static void sh_override_options_after_change (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
190 static rtx_insn* emit_frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET* mask, bool interrupt_handler);
194 static int calc_live_regs (HARD_REG_SET *);
195 static HOST_WIDE_INT rounded_frame_size (int);
196 static bool sh_frame_pointer_required (void);
197 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
198 static int sh_mode_needed (int, rtx_insn *);
199 static int sh_mode_after (int, int, rtx_insn *);
200 static int sh_mode_entry (int);
201 static int sh_mode_exit (int);
202 static int sh_mode_priority (int entity, int n);
204 static rtx mark_constant_pool_use (rtx);
205 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
206 int, bool *);
207 static tree sh_handle_resbank_handler_attribute (tree *, tree,
208 tree, int, bool *);
209 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
210 tree, int, bool *);
211 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
212 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
213 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
214 static void sh_print_operand (FILE *, rtx, int);
215 static void sh_print_operand_address (FILE *, machine_mode, rtx);
216 static bool sh_print_operand_punct_valid_p (unsigned char code);
217 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
218 static void sh_output_function_epilogue (FILE *);
219 static void sh_insert_attributes (tree, tree *);
220 static const char *sh_check_pch_target_flags (int);
221 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
222 static int sh_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
223 static int sh_issue_rate (void);
224 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
225 static short find_set_regmode_weight (rtx, machine_mode);
226 static short find_insn_regmode_weight (rtx, machine_mode);
227 static void find_regmode_weight (basic_block, machine_mode);
228 static int find_r0_life_regions (basic_block);
229 static void sh_md_init_global (FILE *, int, int);
230 static void sh_md_finish_global (FILE *, int);
231 static int rank_for_reorder (const void *, const void *);
232 static void swap_reorder (rtx_insn **, int);
233 static void ready_reorder (rtx_insn **, int);
234 static bool high_pressure (machine_mode);
235 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
236 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
237 static void sh_md_init (FILE *, int, int);
238 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
240 static bool sh_function_ok_for_sibcall (tree, tree);
242 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
243 static bool sh_ms_bitfield_layout_p (const_tree);
245 static void sh_init_builtins (void);
246 static tree sh_builtin_decl (unsigned, bool);
247 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
248 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
249 HOST_WIDE_INT, tree);
250 static void sh_file_start (void);
251 static bool sh_assemble_integer (rtx, unsigned int, int);
252 static bool flow_dependent_p (rtx, rtx);
253 static void flow_dependent_p_1 (rtx, const_rtx, void *);
254 static int shiftcosts (rtx);
255 static int and_xor_ior_costs (rtx, int);
256 static int addsubcosts (rtx);
257 static int multcosts (rtx);
258 static bool unspec_caller_rtx_p (rtx);
259 static bool sh_cannot_copy_insn_p (rtx_insn *);
260 static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
261 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
262 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
263 static int sh_pr_n_sets (void);
264 static rtx sh_allocate_initial_value (rtx);
265 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
266 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
267 machine_mode,
268 struct secondary_reload_info *);
269 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
270 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
271 static rtx sh_delegitimize_address (rtx);
272 static bool sh_cannot_substitute_mem_equiv_p (rtx);
273 static bool sh_legitimize_address_displacement (rtx *, rtx *,
274 poly_int64, machine_mode);
275 static int scavenge_reg (HARD_REG_SET *s);
277 static rtx sh_struct_value_rtx (tree, int);
278 static rtx sh_function_value (const_tree, const_tree, bool);
279 static bool sh_function_value_regno_p (const unsigned int);
280 static rtx sh_libcall_value (machine_mode, const_rtx);
281 static bool sh_return_in_memory (const_tree, const_tree);
282 static rtx sh_builtin_saveregs (void);
283 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
284 tree, int *, int);
285 static bool sh_strict_argument_naming (cumulative_args_t);
286 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
287 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
288 static tree sh_build_builtin_va_list (void);
289 static void sh_va_start (tree, rtx);
290 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
291 static bool sh_promote_prototypes (const_tree);
292 static machine_mode sh_promote_function_mode (const_tree type,
293 machine_mode,
294 int *punsignedp,
295 const_tree funtype,
296 int for_return);
297 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
298 const_tree, bool);
299 static bool sh_callee_copies (cumulative_args_t, machine_mode,
300 const_tree, bool);
301 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
302 tree, bool);
303 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
304 const_tree, bool);
305 static rtx sh_function_arg (cumulative_args_t, machine_mode,
306 const_tree, bool);
307 static int sh_dwarf_calling_convention (const_tree);
308 static void sh_encode_section_info (tree, rtx, int);
309 static bool sh2a_function_vector_p (tree);
310 static void sh_trampoline_init (rtx, tree, rtx);
311 static rtx sh_trampoline_adjust_address (rtx);
312 static void sh_conditional_register_usage (void);
313 static bool sh_legitimate_constant_p (machine_mode, rtx);
314 static int mov_insn_size (machine_mode, bool);
315 static int mov_insn_alignment_mask (machine_mode, bool);
316 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
317 unsigned int,
318 enum by_pieces_operation,
319 bool);
320 static bool sequence_insn_p (rtx_insn *);
321 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
322 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
323 machine_mode, bool);
324 static bool sh_legitimate_combined_insn (rtx_insn* insn);
326 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
328 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
329 static unsigned int sh_hard_regno_nregs (unsigned int, machine_mode);
330 static bool sh_hard_regno_mode_ok (unsigned int, machine_mode);
331 static bool sh_modes_tieable_p (machine_mode, machine_mode);
332 static bool sh_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
334 static const struct attribute_spec sh_attribute_table[] =
336 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
337 affects_type_identity, handler, exclude } */
338 { "interrupt_handler", 0, 0, true, false, false, false,
339 sh_handle_interrupt_handler_attribute, NULL },
340 { "sp_switch", 1, 1, true, false, false, false,
341 sh_handle_sp_switch_attribute, NULL },
342 { "trap_exit", 1, 1, true, false, false, false,
343 sh_handle_trap_exit_attribute, NULL },
344 { "renesas", 0, 0, false, true, false, false,
345 sh_handle_renesas_attribute, NULL },
346 { "trapa_handler", 0, 0, true, false, false, false,
347 sh_handle_interrupt_handler_attribute, NULL },
348 { "nosave_low_regs", 0, 0, true, false, false, false,
349 sh_handle_interrupt_handler_attribute, NULL },
350 { "resbank", 0, 0, true, false, false, false,
351 sh_handle_resbank_handler_attribute, NULL },
352 { "function_vector", 1, 1, true, false, false, false,
353 sh2a_handle_function_vector_handler_attribute, NULL },
354 { NULL, 0, 0, false, false, false, false, NULL, NULL }
357 /* Initialize the GCC target structure. */
358 #undef TARGET_ATTRIBUTE_TABLE
359 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
361 /* The next two are used for debug info when compiling with -gdwarf. */
362 #undef TARGET_ASM_UNALIGNED_HI_OP
363 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
364 #undef TARGET_ASM_UNALIGNED_SI_OP
365 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
367 #undef TARGET_OPTION_OVERRIDE
368 #define TARGET_OPTION_OVERRIDE sh_option_override
370 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
371 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
372 sh_override_options_after_change
374 #undef TARGET_PRINT_OPERAND
375 #define TARGET_PRINT_OPERAND sh_print_operand
376 #undef TARGET_PRINT_OPERAND_ADDRESS
377 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
378 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
379 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
380 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
381 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
383 #undef TARGET_ASM_FUNCTION_EPILOGUE
384 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
386 #undef TARGET_ASM_OUTPUT_MI_THUNK
387 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
389 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
390 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
391 hook_bool_const_tree_hwi_hwi_const_tree_true
393 #undef TARGET_ASM_FILE_START
394 #define TARGET_ASM_FILE_START sh_file_start
395 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
396 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
398 #undef TARGET_ASM_INTEGER
399 #define TARGET_ASM_INTEGER sh_assemble_integer
401 #undef TARGET_REGISTER_MOVE_COST
402 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
404 #undef TARGET_INSERT_ATTRIBUTES
405 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
407 #undef TARGET_SCHED_ADJUST_COST
408 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
410 #undef TARGET_SCHED_ISSUE_RATE
411 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
413 /* The next 5 hooks have been implemented for reenabling sched1. With the
414 help of these macros we are limiting the movement of insns in sched1 to
415 reduce the register pressure. The overall idea is to keep count of SImode
416 and SFmode regs required by already scheduled insns. When these counts
417 cross some threshold values; give priority to insns that free registers.
418 The insn that frees registers is most likely to be the insn with lowest
419 LUID (original insn order); but such an insn might be there in the stalled
420 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
421 up to a max of 8 cycles so that such insns may move from Q -> R.
423 The description of the hooks are as below:
425 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
426 scheduler; it is called inside the sched_init function just after
427 find_insn_reg_weights function call. It is used to calculate the SImode
428 and SFmode weights of insns of basic blocks; much similar to what
429 find_insn_reg_weights does.
430 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
432 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
433 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
434 (Q)->(R).
436 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
437 high; reorder the ready queue so that the insn with lowest LUID will be
438 issued next.
440 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
441 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
443 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
444 can be returned from TARGET_SCHED_REORDER2.
446 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
448 #undef TARGET_SCHED_DFA_NEW_CYCLE
449 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
451 #undef TARGET_SCHED_INIT_GLOBAL
452 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
454 #undef TARGET_SCHED_FINISH_GLOBAL
455 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
457 #undef TARGET_SCHED_VARIABLE_ISSUE
458 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
460 #undef TARGET_SCHED_REORDER
461 #define TARGET_SCHED_REORDER sh_reorder
463 #undef TARGET_SCHED_REORDER2
464 #define TARGET_SCHED_REORDER2 sh_reorder2
466 #undef TARGET_SCHED_INIT
467 #define TARGET_SCHED_INIT sh_md_init
469 #undef TARGET_DELEGITIMIZE_ADDRESS
470 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
472 #undef TARGET_LEGITIMIZE_ADDRESS
473 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
475 #undef TARGET_CAN_FOLLOW_JUMP
476 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
478 #undef TARGET_MS_BITFIELD_LAYOUT_P
479 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
481 #undef TARGET_INIT_BUILTINS
482 #define TARGET_INIT_BUILTINS sh_init_builtins
483 #undef TARGET_BUILTIN_DECL
484 #define TARGET_BUILTIN_DECL sh_builtin_decl
485 #undef TARGET_EXPAND_BUILTIN
486 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
488 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
489 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
491 #undef TARGET_CANNOT_COPY_INSN_P
492 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
493 #undef TARGET_RTX_COSTS
494 #define TARGET_RTX_COSTS sh_rtx_costs
495 #undef TARGET_ADDRESS_COST
496 #define TARGET_ADDRESS_COST sh_address_cost
497 #undef TARGET_ALLOCATE_INITIAL_VALUE
498 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
500 #undef TARGET_MACHINE_DEPENDENT_REORG
501 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
503 #undef TARGET_DWARF_REGISTER_SPAN
504 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
506 #ifdef HAVE_AS_TLS
507 #undef TARGET_HAVE_TLS
508 #define TARGET_HAVE_TLS true
509 #endif
511 #undef TARGET_PROMOTE_PROTOTYPES
512 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
513 #undef TARGET_PROMOTE_FUNCTION_MODE
514 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
516 #undef TARGET_FUNCTION_VALUE
517 #define TARGET_FUNCTION_VALUE sh_function_value
518 #undef TARGET_FUNCTION_VALUE_REGNO_P
519 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
520 #undef TARGET_LIBCALL_VALUE
521 #define TARGET_LIBCALL_VALUE sh_libcall_value
522 #undef TARGET_STRUCT_VALUE_RTX
523 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
524 #undef TARGET_RETURN_IN_MEMORY
525 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
527 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
528 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
529 #undef TARGET_SETUP_INCOMING_VARARGS
530 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
531 #undef TARGET_STRICT_ARGUMENT_NAMING
532 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
533 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
534 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
535 #undef TARGET_MUST_PASS_IN_STACK
536 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
537 #undef TARGET_PASS_BY_REFERENCE
538 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
539 #undef TARGET_CALLEE_COPIES
540 #define TARGET_CALLEE_COPIES sh_callee_copies
541 #undef TARGET_ARG_PARTIAL_BYTES
542 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
543 #undef TARGET_FUNCTION_ARG
544 #define TARGET_FUNCTION_ARG sh_function_arg
545 #undef TARGET_FUNCTION_ARG_ADVANCE
546 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
548 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
549 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
551 #undef TARGET_BUILD_BUILTIN_VA_LIST
552 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
553 #undef TARGET_EXPAND_BUILTIN_VA_START
554 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
555 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
556 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
558 #undef TARGET_VECTOR_MODE_SUPPORTED_P
559 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
561 #undef TARGET_CHECK_PCH_TARGET_FLAGS
562 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
564 #undef TARGET_DWARF_CALLING_CONVENTION
565 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
567 #undef TARGET_FRAME_POINTER_REQUIRED
568 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
570 #undef TARGET_MODE_EMIT
571 #define TARGET_MODE_EMIT sh_emit_mode_set
573 #undef TARGET_MODE_NEEDED
574 #define TARGET_MODE_NEEDED sh_mode_needed
576 #undef TARGET_MODE_AFTER
577 #define TARGET_MODE_AFTER sh_mode_after
579 #undef TARGET_MODE_ENTRY
580 #define TARGET_MODE_ENTRY sh_mode_entry
582 #undef TARGET_MODE_EXIT
583 #define TARGET_MODE_EXIT sh_mode_exit
585 #undef TARGET_MODE_PRIORITY
586 #define TARGET_MODE_PRIORITY sh_mode_priority
588 /* Return regmode weight for insn. */
589 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
590 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
592 /* Return current register pressure for regmode. */
593 #define CURR_REGMODE_PRESSURE(MODE)\
594 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
596 #undef TARGET_ENCODE_SECTION_INFO
597 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
599 #undef TARGET_LRA_P
600 #define TARGET_LRA_P sh_lra_p
602 #undef TARGET_SECONDARY_RELOAD
603 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
605 #undef TARGET_PREFERRED_RELOAD_CLASS
606 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
608 #undef TARGET_CONDITIONAL_REGISTER_USAGE
609 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
611 #undef TARGET_LEGITIMATE_ADDRESS_P
612 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
614 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
615 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
617 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
618 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
619 sh_legitimize_address_displacement
621 #undef TARGET_TRAMPOLINE_INIT
622 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
623 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
624 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
626 #undef TARGET_LEGITIMATE_CONSTANT_P
627 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
629 #undef TARGET_CANONICALIZE_COMPARISON
630 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
632 #undef TARGET_LEGITIMATE_COMBINED_INSN
633 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
635 #undef TARGET_FIXED_CONDITION_CODE_REGS
636 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
638 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
639 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
640 sh_use_by_pieces_infrastructure_p
642 /* Machine-specific symbol_ref flags. */
643 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
645 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
646 is used by optabs.c atomic op expansion code as well as in sync.md. */
647 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
648 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
650 #undef TARGET_CANNOT_FORCE_CONST_MEM
651 #define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
653 #undef TARGET_HARD_REGNO_NREGS
654 #define TARGET_HARD_REGNO_NREGS sh_hard_regno_nregs
655 #undef TARGET_HARD_REGNO_MODE_OK
656 #define TARGET_HARD_REGNO_MODE_OK sh_hard_regno_mode_ok
658 #undef TARGET_MODES_TIEABLE_P
659 #define TARGET_MODES_TIEABLE_P sh_modes_tieable_p
661 #undef TARGET_CAN_CHANGE_MODE_CLASS
662 #define TARGET_CAN_CHANGE_MODE_CLASS sh_can_change_mode_class
664 #undef TARGET_CONSTANT_ALIGNMENT
665 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
667 struct gcc_target targetm = TARGET_INITIALIZER;
670 /* Information on the currently selected atomic model.
671 This is initialized in sh_option_override. */
672 static sh_atomic_model selected_atomic_model_;
674 const sh_atomic_model&
675 selected_atomic_model (void)
677 return selected_atomic_model_;
680 static sh_atomic_model
681 parse_validate_atomic_model_option (const char* str)
683 const char* model_names[sh_atomic_model::num_models];
684 model_names[sh_atomic_model::none] = "none";
685 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
686 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
687 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
688 model_names[sh_atomic_model::soft_imask] = "soft-imask";
690 const char* model_cdef_names[sh_atomic_model::num_models];
691 model_cdef_names[sh_atomic_model::none] = "NONE";
692 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
693 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
694 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
695 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
697 sh_atomic_model ret;
698 ret.type = sh_atomic_model::none;
699 ret.name = model_names[sh_atomic_model::none];
700 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
701 ret.strict = false;
702 ret.tcb_gbr_offset = -1;
704 /* Handle empty string as 'none'. */
705 if (str == NULL || *str == '\0')
706 return ret;
708 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
710 std::vector<std::string> tokens;
711 for (std::stringstream ss (str); ss.good (); )
713 tokens.push_back (std::string ());
714 std::getline (ss, tokens.back (), ',');
717 if (tokens.empty ())
718 err_ret ("invalid atomic model option");
720 /* The first token must be the atomic model name. */
722 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
723 if (tokens.front () == model_names[i])
725 ret.type = (sh_atomic_model::enum_type)i;
726 ret.name = model_names[i];
727 ret.cdef_name = model_cdef_names[i];
728 goto got_mode_name;
731 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
732 got_mode_name:;
735 /* Go through the remaining tokens. */
736 for (size_t i = 1; i < tokens.size (); ++i)
738 if (tokens[i] == "strict")
739 ret.strict = true;
740 else if (tokens[i].find ("gbr-offset=") == 0)
742 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
743 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
744 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
745 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
746 "option", offset_str.c_str ());
748 else
749 err_ret ("unknown parameter \"%s\" in atomic model option",
750 tokens[i].c_str ());
753 /* Check that the selection makes sense. */
754 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
755 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
756 ret.name);
758 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
759 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
761 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
762 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
764 if (ret.type == sh_atomic_model::soft_tcb
765 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
766 || (ret.tcb_gbr_offset & 3) != 0))
767 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
768 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
769 ret.name);
771 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
772 err_ret ("cannot use atomic model %s in user mode", ret.name);
774 return ret;
776 #undef err_ret
779 /* Register SH specific RTL passes. */
780 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
781 const char* name);
782 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
783 const char* name);
784 static void
785 register_sh_passes (void)
787 /* Running the sh_treg_combine pass after ce1 generates better code when
788 comparisons are combined and reg-reg moves are introduced, because
789 reg-reg moves will be eliminated afterwards. However, there are quite
790 some cases where combine will be unable to fold comparison related insns,
791 thus for now don't do it.
792 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
793 PASS_POS_INSERT_AFTER, "ce1", 1);
796 /* Run sh_treg_combine pass after combine but before register allocation. */
797 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
798 PASS_POS_INSERT_AFTER, "split1", 1);
800 /* Run sh_treg_combine pass after register allocation and basic block
801 reordering as this sometimes creates new opportunities. */
802 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
803 PASS_POS_INSERT_AFTER, "split4", 1);
805 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
806 is known after a conditional branch.
807 This must be done after basic blocks and branch conditions have
808 stabilized and won't be changed by further passes. */
809 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
810 PASS_POS_INSERT_BEFORE, "sched2", 1);
813 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
814 various options, and do some machine dependent initialization. */
815 static void
816 sh_option_override (void)
818 int regno;
820 SUBTARGET_OVERRIDE_OPTIONS;
822 sh_cpu = PROCESSOR_SH1;
823 assembler_dialect = 0;
824 if (TARGET_SH2)
825 sh_cpu = PROCESSOR_SH2;
826 if (TARGET_SH2E)
827 sh_cpu = PROCESSOR_SH2E;
828 if (TARGET_SH2A)
829 sh_cpu = PROCESSOR_SH2A;
830 if (TARGET_SH3)
831 sh_cpu = PROCESSOR_SH3;
832 if (TARGET_SH3E)
833 sh_cpu = PROCESSOR_SH3E;
834 if (TARGET_SH4)
836 assembler_dialect = 1;
837 sh_cpu = PROCESSOR_SH4;
839 if (TARGET_SH4A)
841 assembler_dialect = 1;
842 sh_cpu = PROCESSOR_SH4A;
845 /* User/priviledged mode is supported only on SH3* and SH4*.
846 Disable it for everything else. */
847 if (!TARGET_SH3 && TARGET_USERMODE)
848 TARGET_USERMODE = false;
850 if (! strcmp (sh_div_str, "call-div1"))
851 sh_div_strategy = SH_DIV_CALL_DIV1;
852 else if (! strcmp (sh_div_str, "call-fp") && TARGET_FPU_ANY)
853 sh_div_strategy = SH_DIV_CALL_FP;
854 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
855 sh_div_strategy = SH_DIV_CALL_TABLE;
856 else
858 /* Pick one that makes most sense for the target in general.
859 It is not much good to use different functions depending on -Os,
860 since then we'll end up with two different functions when some of
861 the code is compiled for size, and some for speed. */
863 /* SH4 tends to emphasize speed. */
864 if (TARGET_HARD_SH4)
865 sh_div_strategy = SH_DIV_CALL_TABLE;
866 /* These have their own way of doing things. */
867 else if (TARGET_SH2A)
868 sh_div_strategy = SH_DIV_INTRINSIC;
869 /* SH1 .. SH3 cores often go into small-footprint systems, so
870 default to the smallest implementation available. */
871 else
872 sh_div_strategy = SH_DIV_CALL_DIV1;
875 if (sh_divsi3_libfunc[0])
876 ; /* User supplied - leave it alone. */
877 else if (TARGET_DIVIDE_CALL_FP)
878 sh_divsi3_libfunc = "__sdivsi3_i4";
879 else if (TARGET_DIVIDE_CALL_TABLE)
880 sh_divsi3_libfunc = "__sdivsi3_i4i";
881 else
882 sh_divsi3_libfunc = "__sdivsi3";
884 if (sh_branch_cost == -1)
886 /* The SH1 does not have delay slots, hence we get a pipeline stall
887 at every branch. The SH4 is superscalar, so the single delay slot
888 is not sufficient to keep both pipelines filled.
889 In any case, set the default branch cost to '2', as it results in
890 slightly overall smaller code and also enables some if conversions
891 that are required for matching special T bit related insns. */
892 sh_branch_cost = 2;
895 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
896 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
897 TARGET_ZDCBRANCH = 1;
899 /* FDPIC code is a special form of PIC, and the vast majority of code
900 generation constraints that apply to PIC also apply to FDPIC, so we
901 set flag_pic to avoid the need to check TARGET_FDPIC everywhere
902 flag_pic is checked. */
903 if (TARGET_FDPIC && !flag_pic)
904 flag_pic = 2;
906 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
907 if (! VALID_REGISTER_P (regno))
908 sh_register_names[regno][0] = '\0';
910 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
911 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
912 sh_additional_register_names[regno][0] = '\0';
914 if (flag_pic && ! TARGET_PREFERGOT)
915 flag_no_function_cse = 1;
917 if (targetm.small_register_classes_for_mode_p (VOIDmode))
919 /* Never run scheduling before reload, since that can
920 break global alloc, and generates slower code anyway due
921 to the pressure on R0. */
922 /* Enable sched1 for SH4 if the user explicitly requests.
923 When sched1 is enabled, the ready queue will be reordered by
924 the target hooks if pressure is high. We can not do this for
925 PIC, SH3 and lower as they give spill failures for R0. */
926 if (!TARGET_HARD_SH4 || flag_pic)
927 flag_schedule_insns = 0;
928 /* ??? Current exception handling places basic block boundaries
929 after call_insns. It causes the high pressure on R0 and gives
930 spill failures for R0 in reload. See PR 22553 and the thread
931 on gcc-patches
932 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
933 else if (flag_exceptions)
935 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
936 warning (0, "ignoring -fschedule-insns because of exception "
937 "handling bug");
938 flag_schedule_insns = 0;
940 else if (flag_schedule_insns
941 && !global_options_set.x_flag_schedule_insns)
942 flag_schedule_insns = 0;
945 /* Unwind info is not correct around the CFG unless either a frame
946 pointer is present or M_A_O_A is set. Fixing this requires rewriting
947 unwind info generation to be aware of the CFG and propagating states
948 around edges. */
949 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
950 || flag_exceptions || flag_non_call_exceptions)
951 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
953 warning (0, "unwind tables currently require either a frame pointer "
954 "or -maccumulate-outgoing-args for correctness");
955 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
958 if (flag_unsafe_math_optimizations)
960 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
961 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
962 TARGET_FSCA = 1;
964 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
965 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
966 TARGET_FSRRA = 1;
969 /* Allow fsrra insn only if -funsafe-math-optimizations and
970 -ffinite-math-only is enabled. */
971 TARGET_FSRRA = TARGET_FSRRA
972 && flag_unsafe_math_optimizations
973 && flag_finite_math_only;
975 /* If the -mieee option was not explicitly set by the user, turn it on
976 unless -ffinite-math-only was specified. See also PR 33135. */
977 if (! global_options_set.x_TARGET_IEEE)
978 TARGET_IEEE = ! flag_finite_math_only;
980 if (sh_fixed_range_str)
981 sh_fix_range (sh_fixed_range_str);
983 /* This target defaults to strict volatile bitfields. */
984 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
985 flag_strict_volatile_bitfields = 1;
987 sh_override_options_after_change ();
989 /* Parse atomic model option and make sure it is valid for the current
990 target CPU. */
991 selected_atomic_model_
992 = parse_validate_atomic_model_option (sh_atomic_model_str);
994 register_sh_passes ();
997 /* Implement targetm.override_options_after_change. */
999 static void
1000 sh_override_options_after_change (void)
1002 /* Adjust loop, jump and function alignment values (in bytes), if those
1003 were not specified by the user using -falign-loops, -falign-jumps
1004 and -falign-functions options.
1005 32 bit alignment is better for speed, because instructions can be
1006 fetched as a pair from a longword boundary. For size use 16 bit
1007 alignment to get more compact code.
1008 Aligning all jumps increases the code size, even if it might
1009 result in slightly faster code. Thus, it is set to the smallest
1010 alignment possible if not specified by the user. */
1011 if (flag_align_loops && !str_align_loops)
1012 str_align_loops = optimize_size ? "2" : "4";
1014 /* Parse values so that we can compare for current value. */
1015 parse_alignment_opts ();
1016 if (flag_align_jumps && !str_align_jumps)
1017 str_align_jumps = "2";
1018 else if (align_jumps_value < 2)
1019 str_align_jumps = "2";
1021 if (flag_align_functions && !str_align_functions)
1022 str_align_functions = optimize_size ? "2" : "4";
1024 /* The linker relaxation code breaks when a function contains
1025 alignments that are larger than that at the start of a
1026 compilation unit. */
1027 if (TARGET_RELAX)
1029 /* Parse values so that we can compare for current value. */
1030 parse_alignment_opts ();
1031 int min_align = MAX (align_loops_value, align_jumps_value);
1033 /* Also take possible .long constants / mova tables into account. */
1034 if (min_align < 4)
1035 min_align = 4;
1036 if (align_functions_value < min_align)
1038 char *r = XNEWVEC (char, 16);
1039 sprintf (r, "%d", min_align);
1040 str_align_functions = r;
1045 /* Print the operand address in x to the stream. */
1046 static void
1047 sh_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
1049 switch (GET_CODE (x))
1051 case REG:
1052 case SUBREG:
1053 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1054 break;
1056 case PLUS:
1058 rtx base = XEXP (x, 0);
1059 rtx index = XEXP (x, 1);
1061 switch (GET_CODE (index))
1063 case CONST_INT:
1064 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1065 reg_names[true_regnum (base)]);
1066 break;
1068 case REG:
1069 case SUBREG:
1071 int base_num = true_regnum (base);
1072 int index_num = true_regnum (index);
1074 /* If base or index is R0, make sure that it comes first.
1075 Usually one of them will be R0, but the order might be wrong.
1076 If neither base nor index are R0 it's an error and we just
1077 pass it on to the assembler. This avoids silent wrong code
1078 bugs. */
1079 if (base_num == 0 && index_num != 0)
1080 std::swap (base_num, index_num);
1082 fprintf (stream, "@(%s,%s)", reg_names[index_num],
1083 reg_names[base_num]);
1084 break;
1087 default:
1088 gcc_unreachable ();
1091 break;
1093 case PRE_DEC:
1094 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1095 break;
1097 case POST_INC:
1098 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1099 break;
1101 default:
1102 x = mark_constant_pool_use (x);
1103 output_addr_const (stream, x);
1104 break;
1108 /* Print operand x (an rtx) in assembler syntax to file stream
1109 according to modifier code.
1111 '.' print a .s if insn needs delay slot
1112 ',' print LOCAL_LABEL_PREFIX
1113 '@' print trap, rte or rts depending upon pragma interruptness
1114 '#' output a nop if there is nothing to put in the delay slot
1115 ''' print likelihood suffix (/u for unlikely).
1116 '>' print branch target if -fverbose-asm
1117 'O' print a constant without the #
1118 'R' print the LSW of a dp value - changes if in little endian
1119 'S' print the MSW of a dp value - changes if in little endian
1120 'T' print the next word of a dp value - same as 'R' in big endian mode.
1121 'M' print .b / .w / .l / .s / .d suffix if operand is a MEM.
1122 'N' print 'r63' if the operand is (const_int 0).
1123 'd' print a V2SF reg as dN instead of fpN.
1124 'm' print a pair `base,offset' or `base,index', for LD and ST.
1125 'U' Likewise for {LD,ST}{HI,LO}.
1126 'V' print the position of a single bit set.
1127 'W' print the position of a single bit cleared.
1128 't' print a memory address which is a register.
1129 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1130 'o' output an operator. */
1131 static void
1132 sh_print_operand (FILE *stream, rtx x, int code)
1134 int regno;
1135 machine_mode mode;
1137 switch (code)
1139 tree trapa_attr;
1141 case '.':
1142 if (final_sequence
1143 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1144 && get_attr_length (final_sequence->insn (1)))
1145 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1146 break;
1147 case ',':
1148 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1149 break;
1150 case '@':
1151 trapa_attr = lookup_attribute ("trap_exit",
1152 DECL_ATTRIBUTES (current_function_decl));
1153 if (trapa_attr)
1154 fprintf (stream, "trapa #%ld",
1155 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1156 else if (sh_cfun_interrupt_handler_p ())
1158 if (sh_cfun_resbank_handler_p ())
1159 fprintf (stream, "resbank\n");
1160 fprintf (stream, "rte");
1162 else
1163 fprintf (stream, "rts");
1164 break;
1165 case '#':
1166 /* Output a nop if there's nothing in the delay slot. */
1167 if (dbr_sequence_length () == 0)
1168 fprintf (stream, "\n\tnop");
1169 break;
1170 case '\'':
1172 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1174 if (note
1175 && profile_probability::from_reg_br_prob_note (XINT (note, 0))
1176 < profile_probability::even ())
1177 fputs ("/u", stream);
1178 break;
1180 case '>':
1181 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1183 fputs ("\t! target: ", stream);
1184 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1186 break;
1187 case 'O':
1188 x = mark_constant_pool_use (x);
1189 output_addr_const (stream, x);
1190 break;
1191 /* N.B.: %R / %S / %T adjust memory addresses by four.
1192 While they can be used to access 64 bit parts of a larger value
1193 held in general purpose registers, that won't work with memory -
1194 neither for fp registers, since the frxx names are used. */
1195 case 'R':
1196 if (REG_P (x) || GET_CODE (x) == SUBREG)
1198 regno = true_regnum (x);
1199 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1200 fputs (reg_names[regno], (stream));
1202 else if (MEM_P (x))
1204 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1205 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1207 else
1209 rtx sub = NULL_RTX;
1211 mode = GET_MODE (x);
1212 if (mode == VOIDmode)
1213 mode = DImode;
1214 if (GET_MODE_SIZE (mode) >= 8)
1215 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1216 if (sub)
1217 sh_print_operand (stream, sub, 0);
1218 else
1219 output_operand_lossage ("invalid operand to %%R");
1221 break;
1222 case 'S':
1223 if (REG_P (x) || GET_CODE (x) == SUBREG)
1225 regno = true_regnum (x);
1226 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1227 fputs (reg_names[regno], (stream));
1229 else if (MEM_P (x))
1231 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1232 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1234 else
1236 rtx sub = NULL_RTX;
1238 mode = GET_MODE (x);
1239 if (mode == VOIDmode)
1240 mode = DImode;
1241 if (GET_MODE_SIZE (mode) >= 8)
1242 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1243 if (sub)
1244 sh_print_operand (stream, sub, 0);
1245 else
1246 output_operand_lossage ("invalid operand to %%S");
1248 break;
1249 case 'T':
1250 /* Next word of a double. */
1251 switch (GET_CODE (x))
1253 case REG:
1254 fputs (reg_names[REGNO (x) + 1], (stream));
1255 break;
1256 case MEM:
1258 machine_mode mode = GET_MODE (x);
1259 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1260 && GET_CODE (XEXP (x, 0)) != POST_INC)
1261 x = adjust_address (x, SImode, 4);
1262 sh_print_operand_address (stream, mode, XEXP (x, 0));
1264 break;
1265 default:
1266 break;
1268 break;
1270 case 't':
1271 gcc_assert (MEM_P (x));
1272 x = XEXP (x, 0);
1273 switch (GET_CODE (x))
1275 case REG:
1276 case SUBREG:
1277 sh_print_operand (stream, x, 0);
1278 break;
1279 default:
1280 break;
1282 break;
1284 case 'o':
1285 switch (GET_CODE (x))
1287 case PLUS: fputs ("add", stream); break;
1288 case MINUS: fputs ("sub", stream); break;
1289 case MULT: fputs ("mul", stream); break;
1290 case DIV: fputs ("div", stream); break;
1291 case EQ: fputs ("eq", stream); break;
1292 case NE: fputs ("ne", stream); break;
1293 case GT: case LT: fputs ("gt", stream); break;
1294 case GE: case LE: fputs ("ge", stream); break;
1295 case GTU: case LTU: fputs ("gtu", stream); break;
1296 case GEU: case LEU: fputs ("geu", stream); break;
1297 default:
1298 break;
1300 break;
1301 case 'M':
1302 if (MEM_P (x))
1304 switch (GET_MODE (x))
1306 case E_QImode: fputs (".b", stream); break;
1307 case E_HImode: fputs (".w", stream); break;
1308 case E_SImode: fputs (".l", stream); break;
1309 case E_SFmode: fputs (".s", stream); break;
1310 case E_DFmode: fputs (".d", stream); break;
1311 default: gcc_unreachable ();
1314 break;
1316 case 'm':
1317 gcc_assert (MEM_P (x));
1318 x = XEXP (x, 0);
1319 /* Fall through. */
1320 case 'U':
1321 switch (GET_CODE (x))
1323 case REG:
1324 case SUBREG:
1325 sh_print_operand (stream, x, 0);
1326 fputs (", 0", stream);
1327 break;
1329 case PLUS:
1330 sh_print_operand (stream, XEXP (x, 0), 0);
1331 fputs (", ", stream);
1332 sh_print_operand (stream, XEXP (x, 1), 0);
1333 break;
1335 default:
1336 gcc_unreachable ();
1338 break;
1340 case 'V':
1342 int num = exact_log2 (INTVAL (x));
1343 gcc_assert (num >= 0);
1344 fprintf (stream, "#%d", num);
1346 break;
1348 case 'W':
1350 int num = exact_log2 (~INTVAL (x));
1351 gcc_assert (num >= 0);
1352 fprintf (stream, "#%d", num);
1354 break;
1356 case 'd':
1357 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1359 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1360 break;
1362 case 'N':
1363 if (x == CONST0_RTX (GET_MODE (x)))
1365 fprintf ((stream), "r63");
1366 break;
1368 goto default_output;
1369 case 'u':
1370 if (CONST_INT_P (x))
1372 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1373 break;
1375 /* Fall through. */
1377 default_output:
1378 default:
1379 regno = 0;
1380 mode = GET_MODE (x);
1382 switch (GET_CODE (x))
1384 case TRUNCATE:
1386 rtx inner = XEXP (x, 0);
1387 int offset = 0;
1388 machine_mode inner_mode;
1390 /* We might see SUBREGs with vector mode registers inside. */
1391 if (GET_CODE (inner) == SUBREG
1392 && (GET_MODE_SIZE (GET_MODE (inner))
1393 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1394 && subreg_lowpart_p (inner))
1395 inner = SUBREG_REG (inner);
1396 if (CONST_INT_P (inner))
1398 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1399 goto default_output;
1401 inner_mode = GET_MODE (inner);
1402 if (GET_CODE (inner) == SUBREG
1403 && (GET_MODE_SIZE (GET_MODE (inner))
1404 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1405 && REG_P (SUBREG_REG (inner)))
1407 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1408 GET_MODE (SUBREG_REG (inner)),
1409 SUBREG_BYTE (inner),
1410 GET_MODE (inner));
1411 inner = SUBREG_REG (inner);
1413 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1414 abort ();
1415 /* Floating point register pairs are always big endian;
1416 general purpose registers are 64 bit wide. */
1417 regno = REGNO (inner);
1418 regno = (hard_regno_nregs (regno, inner_mode)
1419 - hard_regno_nregs (regno, mode))
1420 + offset;
1421 x = inner;
1422 goto reg;
1424 case SIGN_EXTEND:
1425 x = XEXP (x, 0);
1426 goto reg;
1427 case SUBREG:
1428 gcc_assert (SUBREG_BYTE (x) == 0
1429 && REG_P (SUBREG_REG (x)));
1431 x = SUBREG_REG (x);
1432 /* Fall through. */
1434 reg:
1435 case REG:
1436 regno += REGNO (x);
1437 if (FP_REGISTER_P (regno)
1438 && mode == V16SFmode)
1439 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1440 else if (FP_REGISTER_P (REGNO (x))
1441 && mode == V4SFmode)
1442 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1443 else if (REG_P (x)
1444 && mode == V2SFmode)
1445 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1446 else if (FP_REGISTER_P (REGNO (x))
1447 && GET_MODE_SIZE (mode) > 4)
1448 fprintf ((stream), "d%s", reg_names[regno] + 1);
1449 else
1450 fputs (reg_names[regno], (stream));
1451 break;
1453 case MEM:
1454 output_address (GET_MODE (x), XEXP (x, 0));
1455 break;
1457 default:
1458 fputc ('#', stream);
1459 output_addr_const (stream, x);
1460 break;
1462 break;
1466 static bool
1467 sh_print_operand_punct_valid_p (unsigned char code)
1469 return (code == '.' || code == '#' || code == '@' || code == ','
1470 || code == '$' || code == '\'' || code == '>');
1473 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1474 static bool
1475 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1477 if (GET_CODE (x) == UNSPEC)
1479 switch (XINT (x, 1))
1481 case UNSPEC_PIC:
1482 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1483 output_addr_const (file, XVECEXP (x, 0, 0));
1484 break;
1485 case UNSPEC_GOT:
1486 output_addr_const (file, XVECEXP (x, 0, 0));
1487 fputs ("@GOT", file);
1488 break;
1489 case UNSPEC_GOTOFF:
1490 output_addr_const (file, XVECEXP (x, 0, 0));
1491 fputs ("@GOTOFF", file);
1492 break;
1493 case UNSPEC_PLT:
1494 output_addr_const (file, XVECEXP (x, 0, 0));
1495 fputs ("@PLT", file);
1496 break;
1497 case UNSPEC_GOTPLT:
1498 output_addr_const (file, XVECEXP (x, 0, 0));
1499 fputs ("@GOTPLT", file);
1500 break;
1501 case UNSPEC_PCREL:
1502 output_addr_const (file, XVECEXP (x, 0, 0));
1503 fputs ("@PCREL", file);
1504 break;
1505 case UNSPEC_DTPOFF:
1506 output_addr_const (file, XVECEXP (x, 0, 0));
1507 fputs ("@DTPOFF", file);
1508 break;
1509 case UNSPEC_GOTTPOFF:
1510 output_addr_const (file, XVECEXP (x, 0, 0));
1511 fputs ("@GOTTPOFF", file);
1512 break;
1513 case UNSPEC_TPOFF:
1514 output_addr_const (file, XVECEXP (x, 0, 0));
1515 fputs ("@TPOFF", file);
1516 break;
1517 case UNSPEC_CALLER:
1519 char name[32];
1520 /* LPCS stands for Label for PIC Call Site. */
1521 targetm.asm_out.generate_internal_label (name, "LPCS",
1522 INTVAL (XVECEXP (x, 0, 0)));
1523 assemble_name (file, name);
1525 break;
1526 case UNSPEC_SYMOFF:
1527 output_addr_const (file, XVECEXP (x, 0, 0));
1528 fputc ('-', file);
1529 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1531 fputc ('(', file);
1532 output_addr_const (file, XVECEXP (x, 0, 1));
1533 fputc (')', file);
1535 else
1536 output_addr_const (file, XVECEXP (x, 0, 1));
1537 break;
1538 case UNSPEC_PCREL_SYMOFF:
1539 output_addr_const (file, XVECEXP (x, 0, 0));
1540 fputs ("-(", file);
1541 output_addr_const (file, XVECEXP (x, 0, 1));
1542 fputs ("-.)", file);
1543 break;
1544 case UNSPEC_GOTFUNCDESC:
1545 output_addr_const (file, XVECEXP (x, 0, 0));
1546 fputs ("@GOTFUNCDESC", file);
1547 break;
1548 case UNSPEC_GOTOFFFUNCDESC:
1549 output_addr_const (file, XVECEXP (x, 0, 0));
1550 fputs ("@GOTOFFFUNCDESC", file);
1551 break;
1552 default:
1553 return false;
1555 return true;
1557 else
1558 return false;
1561 /* Encode symbol attributes of a SYMBOL_REF into its
1562 SYMBOL_REF_FLAGS. */
1563 static void
1564 sh_encode_section_info (tree decl, rtx rtl, int first)
1566 default_encode_section_info (decl, rtl, first);
1568 if (TREE_CODE (decl) == FUNCTION_DECL
1569 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1570 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1573 /* Prepare operands for a move define_expand; specifically, one of the
1574 operands must be in a register. */
1575 void
1576 prepare_move_operands (rtx operands[], machine_mode mode)
1578 if ((mode == SImode || mode == DImode)
1579 && flag_pic
1580 && ! ((mode == Pmode || mode == ptr_mode)
1581 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1583 rtx temp;
1584 if (SYMBOLIC_CONST_P (operands[1]))
1586 if (MEM_P (operands[0]))
1587 operands[1] = force_reg (Pmode, operands[1]);
1588 else
1590 temp = (!can_create_pseudo_p ()
1591 ? operands[0]
1592 : gen_reg_rtx (Pmode));
1593 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1596 else if (GET_CODE (operands[1]) == CONST
1597 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1598 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1600 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1601 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1602 mode, temp);
1603 operands[1] = expand_binop (mode, add_optab, temp,
1604 XEXP (XEXP (operands[1], 0), 1),
1605 (!can_create_pseudo_p ()
1606 ? temp
1607 : gen_reg_rtx (Pmode)),
1608 0, OPTAB_LIB_WIDEN);
1612 if (! reload_in_progress && ! reload_completed)
1614 /* Copy the source to a register if both operands aren't registers. */
1615 if (! register_operand (operands[0], mode)
1616 && ! register_operand (operands[1], mode))
1617 operands[1] = copy_to_mode_reg (mode, operands[1]);
1619 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1621 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1622 except that we can't use that function because it is static. */
1623 rtx new_rtx = change_address (operands[0], mode, 0);
1624 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1625 operands[0] = new_rtx;
1628 /* This case can happen while generating code to move the result
1629 of a library call to the target. Reject `st r0,@(rX,rY)' because
1630 reload will fail to find a spill register for rX, since r0 is already
1631 being used for the source. */
1632 else if (refers_to_regno_p (R0_REG, operands[1])
1633 && MEM_P (operands[0])
1634 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1635 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1636 operands[1] = copy_to_mode_reg (mode, operands[1]);
1638 /* When the displacement addressing is used, RA will assign r0 to
1639 the pseudo register operand for the QI/HImode load/store.
1640 This tends to make a long live range for R0 and might cause
1641 anomalous register spills in some case with LRA. See PR
1642 target/55212.
1643 We split possible load/store to two move insns via r0 so as to
1644 shorten R0 live range. It will make some codes worse but will
1645 win on average for LRA.
1646 Also when base+index addressing is used and the index term is
1647 a subreg, LRA assumes that more hard registers can be available
1648 in some situation. It isn't the case for SH in the problematic
1649 case. We can pre-allocate R0 for that index term to avoid
1650 the issue. See PR target/66591. */
1651 else if (sh_lra_p ()
1652 && ! TARGET_SH2A
1653 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1654 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1656 bool load_p = REG_P (operands[0]);
1657 rtx reg = operands[load_p ? 0 : 1];
1658 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1660 if ((mode == QImode || mode == HImode)
1661 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1662 && GET_CODE (adr) == PLUS
1663 && REG_P (XEXP (adr, 0))
1664 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1665 && CONST_INT_P (XEXP (adr, 1))
1666 && INTVAL (XEXP (adr, 1)) != 0
1667 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1669 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1670 emit_move_insn (r0_rtx, operands[1]);
1671 operands[1] = r0_rtx;
1673 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1674 && GET_CODE (adr) == PLUS
1675 && REG_P (XEXP (adr, 0))
1676 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1677 && SUBREG_P (XEXP (adr, 1))
1678 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1680 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1681 emit_move_insn (r0_rtx, XEXP (adr, 1));
1682 XEXP (adr, 1) = r0_rtx;
1687 if (mode == Pmode || mode == ptr_mode)
1689 rtx op0 = operands[0];
1690 rtx op1 = operands[1];
1691 rtx opc;
1692 if (GET_CODE (op1) == CONST
1693 && GET_CODE (XEXP (op1, 0)) == PLUS
1694 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1695 != TLS_MODEL_NONE))
1697 opc = XEXP (XEXP (op1, 0), 1);
1698 op1 = XEXP (XEXP (op1, 0), 0);
1700 else
1701 opc = NULL_RTX;
1703 enum tls_model tls_kind;
1705 if (! reload_in_progress && ! reload_completed
1706 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1708 rtx tga_op1, tga_ret, tmp, tmp2;
1710 if (! flag_pic
1711 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1712 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1713 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1715 static int got_labelno;
1716 /* Don't schedule insns for getting GOT address when
1717 the first scheduling is enabled, to avoid spill
1718 failures for R0. */
1719 if (flag_schedule_insns)
1720 emit_insn (gen_blockage ());
1721 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1722 emit_use (gen_rtx_REG (SImode, PIC_REG));
1723 if (flag_schedule_insns)
1724 emit_insn (gen_blockage ());
1727 switch (tls_kind)
1729 case TLS_MODEL_GLOBAL_DYNAMIC:
1730 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1731 if (TARGET_FDPIC)
1732 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1733 sh_get_fdpic_reg_initial_val ());
1734 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1735 tmp = gen_reg_rtx (Pmode);
1736 emit_move_insn (tmp, tga_ret);
1737 op1 = tmp;
1738 break;
1740 case TLS_MODEL_LOCAL_DYNAMIC:
1741 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1742 if (TARGET_FDPIC)
1743 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1744 sh_get_fdpic_reg_initial_val ());
1745 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1747 tmp = gen_reg_rtx (Pmode);
1748 emit_move_insn (tmp, tga_ret);
1750 if (register_operand (op0, Pmode))
1751 tmp2 = op0;
1752 else
1753 tmp2 = gen_reg_rtx (Pmode);
1755 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1756 op1 = tmp2;
1757 break;
1759 case TLS_MODEL_INITIAL_EXEC:
1760 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1761 tmp = gen_sym2GOTTPOFF (op1);
1762 if (TARGET_FDPIC)
1763 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1764 sh_get_fdpic_reg_initial_val ());
1765 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1766 op1 = tga_op1;
1767 break;
1769 case TLS_MODEL_LOCAL_EXEC:
1770 tmp2 = gen_reg_rtx (Pmode);
1771 emit_insn (gen_store_gbr (tmp2));
1772 tmp = gen_reg_rtx (Pmode);
1773 emit_insn (gen_symTPOFF2reg (tmp, op1));
1775 if (register_operand (op0, Pmode))
1776 op1 = op0;
1777 else
1778 op1 = gen_reg_rtx (Pmode);
1780 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1781 break;
1783 default:
1784 gcc_unreachable ();
1786 if (opc)
1787 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1788 operands[1] = op1;
1792 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
1794 rtx base, offset;
1795 split_const (operands[1], &base, &offset);
1797 if (GET_CODE (base) == SYMBOL_REF
1798 && !offset_within_block_p (base, INTVAL (offset)))
1800 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
1801 emit_move_insn (tmp, base);
1802 if (!arith_operand (offset, mode))
1803 offset = force_reg (mode, offset);
1804 emit_insn (gen_add3_insn (operands[0], tmp, offset));
1809 /* Implement the canonicalize_comparison target hook for the combine
1810 pass. For the target hook this function is invoked via
1811 sh_canonicalize_comparison. This function is also re-used to
1812 canonicalize comparisons in cbranch pattern expanders. */
1813 static void
1814 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1815 machine_mode mode,
1816 bool op0_preserve_value)
1818 /* When invoked from within the combine pass the mode is not specified,
1819 so try to get it from one of the operands. */
1820 if (mode == VOIDmode)
1821 mode = GET_MODE (op0);
1822 if (mode == VOIDmode)
1823 mode = GET_MODE (op1);
1825 // We need to have a mode to do something useful here.
1826 if (mode == VOIDmode)
1827 return;
1829 // Currently, we don't deal with floats here.
1830 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1831 return;
1833 // Make sure that the constant operand is the second operand.
1834 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1836 if (op0_preserve_value)
1837 return;
1839 std::swap (op0, op1);
1840 cmp = swap_condition (cmp);
1843 if (CONST_INT_P (op1))
1845 /* Try to adjust the constant operand in such a way that available
1846 comparison insns can be utilized better and the constant can be
1847 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1848 constant pool. */
1849 const HOST_WIDE_INT val = INTVAL (op1);
1851 /* x > -1 --> x >= 0
1852 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1853 x <= -1 --> x < 0
1854 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1855 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1857 cmp = cmp == GT ? GE : LT;
1858 op1 = gen_int_mode (val + 1, mode);
1861 /* x >= 1 --> x > 0
1862 x >= 0x80 --> x > 0x7F
1863 x < 1 --> x <= 0
1864 x < 0x80 --> x <= 0x7F */
1865 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1867 cmp = cmp == GE ? GT : LE;
1868 op1 = gen_int_mode (val - 1, mode);
1871 /* unsigned x >= 1 --> x != 0
1872 unsigned x < 1 --> x == 0 */
1873 else if (val == 1 && (cmp == GEU || cmp == LTU))
1875 cmp = cmp == GEU ? NE : EQ;
1876 op1 = CONST0_RTX (mode);
1879 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1880 unsigned x < 0x80 --> unsigned x < 0x7F */
1881 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1883 cmp = cmp == GEU ? GTU : LEU;
1884 op1 = gen_int_mode (val - 1, mode);
1887 /* unsigned x > 0 --> x != 0
1888 unsigned x <= 0 --> x == 0 */
1889 else if (val == 0 && (cmp == GTU || cmp == LEU))
1890 cmp = cmp == GTU ? NE : EQ;
1892 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1893 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1894 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1895 && val == 0x7FFFFFFF)
1897 cmp = cmp == GTU ? LT : GE;
1898 op1 = const0_rtx;
1901 /* unsigned x >= 0x80000000 --> signed x < 0
1902 unsigned x < 0x80000000 --> signed x >= 0 */
1903 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1904 && (unsigned HOST_WIDE_INT)val
1905 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1907 cmp = cmp == GEU ? LT : GE;
1908 op1 = const0_rtx;
1913 /* This function implements the canonicalize_comparison target hook.
1914 This wrapper around the internally used sh_canonicalize_comparison
1915 function is needed to do the enum rtx_code <-> int conversion.
1916 Target hooks cannot use enum rtx_code in its definition. */
1917 static void
1918 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1919 bool op0_preserve_value)
1921 enum rtx_code tmp_code = (enum rtx_code)*code;
1922 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1923 VOIDmode, op0_preserve_value);
1924 *code = (int)tmp_code;
1927 /* This function implements the legitimate_combined_insn target hook,
1928 which the combine pass uses to early reject combined insns, before
1929 it tries to recog the insn and determine its cost. */
1930 static bool
1931 sh_legitimate_combined_insn (rtx_insn* insn)
1933 /* Reject combinations of memory loads and zero extensions, as these
1934 interfere with other combine patterns such as zero extracts and bit
1935 tests. The SH2A movu.{b|w} insns are formed later in the
1936 'sh_optimize_extu_exts' pass after combine/split1. */
1937 rtx p = PATTERN (insn);
1938 if (GET_CODE (p) == SET
1939 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
1940 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
1941 && MEM_P (XEXP (XEXP (p, 1), 0)))
1942 return false;
1944 return true;
1947 bool
1948 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1950 *p1 = T_REG;
1951 *p2 = INVALID_REGNUM;
1952 return true;
1955 /* Try to calculate the branch distance of a conditional branch in bytes.
1957 FIXME: Because of PR 59189 we can't use the CFG here. Instead just
1958 walk from this insn into the next (fall-through) basic block and see if
1959 we hit the label. */
1960 unsigned int
1961 sh_cbranch_distance (rtx_insn* _cbranch_insn, unsigned int max_dist)
1963 rtx_jump_insn* cbranch_insn = safe_as_a<rtx_jump_insn*> (_cbranch_insn);
1965 if (dump_file)
1967 fprintf (dump_file, "sh_cbranch_distance insn = \n");
1968 print_rtl_single (dump_file, cbranch_insn);
1971 unsigned int dist = 0;
1973 for (rtx_insn* i = next_nonnote_insn (cbranch_insn);
1974 i != NULL && dist < max_dist; i = next_nonnote_insn (i))
1976 const unsigned int i_len = get_attr_length (i);
1977 dist += i_len;
1979 if (dump_file)
1980 fprintf (dump_file, " insn %d length = %u dist = %u\n",
1981 INSN_UID (i), i_len, dist);
1983 if (rtx_code_label* l = dyn_cast<rtx_code_label*> (i))
1985 if (l == cbranch_insn->jump_target ())
1987 if (dump_file)
1988 fprintf (dump_file, " cbranch dist = %u\n", dist);
1989 return dist;
1991 break;
1995 if (dump_file)
1996 fprintf (dump_file, " cbranch dist = unknown\n");
1998 return unknown_cbranch_distance;
2001 enum rtx_code
2002 prepare_cbranch_operands (rtx *operands, machine_mode mode,
2003 enum rtx_code comparison)
2005 gcc_assert (can_create_pseudo_p ());
2007 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2008 comparison = GET_CODE (operands[0]);
2010 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2011 mode, false);
2013 rtx op1 = operands[1];
2014 operands[1] = force_reg (mode, op1);
2016 /* When we are handling DImode comparisons, we want to keep constants so
2017 that we can optimize the component comparisons; however, memory loads
2018 are better issued as a whole so that they can be scheduled well.
2019 SImode equality comparisons allow I08 constants, but only when they
2020 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2021 into a register, that register might as well be r0, and we allow the
2022 constant. If it is already in a register, this is likely to be
2023 allocated to a different hard register, thus we load the constant into
2024 a register unless it is zero. */
2025 if (!REG_P (operands[2])
2026 && (!CONST_INT_P (operands[2])
2027 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2028 && ((comparison != EQ && comparison != NE)
2029 || (REG_P (op1) && REGNO (op1) != R0_REG)
2030 || !satisfies_constraint_I08 (operands[2])))))
2031 operands[2] = force_reg (mode, operands[2]);
2033 return comparison;
2036 static void
2037 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison,
2038 profile_probability probability)
2040 rtx (*branch_expander) (rtx) = gen_branch_true;
2041 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2042 switch (comparison)
2044 case NE: case LT: case LE: case LTU: case LEU:
2045 comparison = reverse_condition (comparison);
2046 branch_expander = gen_branch_false;
2047 default: ;
2049 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2050 gen_rtx_fmt_ee (comparison, SImode,
2051 operands[1], operands[2])));
2052 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2053 if (probability.initialized_p ())
2054 add_reg_br_prob_note (jump, probability);
2057 void
2058 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison)
2060 expand_cbranchsi4 (operands, comparison,
2061 profile_probability::uninitialized ());
2064 /* ??? How should we distribute probabilities when more than one branch
2065 is generated. So far we only have some ad-hoc observations:
2066 - If the operands are random, they are likely to differ in both parts.
2067 - If comparing items in a hash chain, the operands are random or equal;
2068 operation should be EQ or NE.
2069 - If items are searched in an ordered tree from the root, we can expect
2070 the highpart to be unequal about half of the time; operation should be
2071 an inequality comparison, operands non-constant, and overall probability
2072 about 50%. Likewise for quicksort.
2073 - Range checks will be often made against constants. Even if we assume for
2074 simplicity an even distribution of the non-constant operand over a
2075 sub-range here, the same probability could be generated with differently
2076 wide sub-ranges - as long as the ratio of the part of the subrange that
2077 is before the threshold to the part that comes after the threshold stays
2078 the same. Thus, we can't really tell anything here;
2079 assuming random distribution is at least simple.
2081 bool
2082 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2084 enum rtx_code msw_taken, msw_skip, lsw_taken;
2085 rtx_code_label *skip_label = NULL;
2086 rtx op1h, op1l, op2h, op2l;
2087 int num_branches;
2088 profile_probability prob, rev_prob;
2089 profile_probability msw_taken_prob = profile_probability::uninitialized (),
2090 msw_skip_prob = profile_probability::uninitialized (),
2091 lsw_taken_prob = profile_probability::uninitialized ();
2093 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2094 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2095 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2096 op1l = gen_lowpart (SImode, operands[1]);
2097 op2l = gen_lowpart (SImode, operands[2]);
2098 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2099 prob = split_branch_probability;
2100 rev_prob = prob.invert ();
2101 switch (comparison)
2103 case EQ:
2104 msw_skip = NE;
2105 lsw_taken = EQ;
2106 if (prob.initialized_p ())
2108 /* FIXME: This is not optimal. We do not really know the probablity
2109 that values differ by MCW only, but we should probably distribute
2110 probabilities more evenly. */
2111 msw_skip_prob = rev_prob;
2112 lsw_taken_prob = prob > profile_probability::never ()
2113 ? profile_probability::guessed_always ()
2114 : profile_probability::guessed_never ();
2116 break;
2117 case NE:
2118 msw_taken = NE;
2119 msw_taken_prob = prob;
2120 lsw_taken = NE;
2121 lsw_taken_prob = profile_probability::guessed_never ();
2122 break;
2123 case GTU: case GT:
2124 msw_taken = comparison;
2125 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2126 break;
2127 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2128 msw_skip = swap_condition (msw_taken);
2129 lsw_taken = GTU;
2130 break;
2131 case GEU: case GE:
2132 if (op2l == CONST0_RTX (SImode))
2133 msw_taken = comparison;
2134 else
2136 msw_taken = comparison == GE ? GT : GTU;
2137 msw_skip = swap_condition (msw_taken);
2138 lsw_taken = GEU;
2140 break;
2141 case LTU: case LT:
2142 msw_taken = comparison;
2143 if (op2l == CONST0_RTX (SImode))
2144 break;
2145 msw_skip = swap_condition (msw_taken);
2146 lsw_taken = LTU;
2147 break;
2148 case LEU: case LE:
2149 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2150 msw_taken = comparison;
2151 else
2153 lsw_taken = LEU;
2154 if (comparison == LE)
2155 msw_taken = LT;
2156 else if (op2h != CONST0_RTX (SImode))
2157 msw_taken = LTU;
2158 else
2160 msw_skip = swap_condition (LTU);
2161 break;
2163 msw_skip = swap_condition (msw_taken);
2165 break;
2166 default: return false;
2168 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2169 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2170 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2171 if (comparison != EQ && comparison != NE && num_branches > 1)
2173 if (!CONSTANT_P (operands[2])
2174 && prob.initialized_p ()
2175 && prob.to_reg_br_prob_base () >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2176 && prob.to_reg_br_prob_base () <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2178 msw_taken_prob = prob.apply_scale (1, 2);
2179 msw_skip_prob = rev_prob.apply_scale (REG_BR_PROB_BASE,
2180 rev_prob.to_reg_br_prob_base ()
2181 + REG_BR_PROB_BASE);
2182 lsw_taken_prob = prob;
2184 else
2186 msw_taken_prob = prob;
2187 msw_skip_prob = profile_probability::guessed_always ();
2188 /* ??? If we have a constant op2h, should we use that when
2189 calculating lsw_taken_prob? */
2190 lsw_taken_prob = prob;
2193 operands[1] = op1h;
2194 operands[2] = op2h;
2196 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2197 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2198 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2200 rtx taken_label = operands[3];
2202 /* Operands were possibly modified, but msw_skip doesn't expect this.
2203 Always use the original ones. */
2204 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2206 operands[1] = op1h;
2207 operands[2] = op2h;
2210 operands[3] = skip_label = gen_label_rtx ();
2211 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2212 operands[3] = taken_label;
2214 operands[1] = op1l;
2215 operands[2] = op2l;
2216 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2217 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2218 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2219 emit_label (skip_label);
2220 return true;
2223 /* Given an operand, return 1 if the evaluated operand plugged into an
2224 if_then_else will result in a branch_true, 0 if branch_false, or
2225 -1 if neither nor applies. The truth table goes like this:
2227 op | cmpval | code | result
2228 ---------+--------+---------+--------------------
2229 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2230 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2231 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2232 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2233 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2234 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2235 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2236 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2238 sh_eval_treg_value (rtx op)
2240 if (t_reg_operand (op, GET_MODE (op)))
2241 return 1;
2242 if (negt_reg_operand (op, GET_MODE (op)))
2243 return 0;
2245 rtx_code code = GET_CODE (op);
2246 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2247 return -1;
2249 int cmpop = code == EQ ? 1 : 0;
2250 int cmpval = INTVAL (XEXP (op, 1));
2251 if (cmpval != 0 && cmpval != 1)
2252 return -1;
2254 int t;
2255 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2256 t = 0;
2257 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2258 t = 1;
2259 else
2260 return -1;
2262 return t ^ (cmpval == cmpop);
2265 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2266 of floating-point comparisons. */
2267 static void
2268 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2270 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2271 && GET_CODE (insn) != PARALLEL)
2273 insn = gen_rtx_PARALLEL (VOIDmode,
2274 gen_rtvec (3, insn,
2275 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2276 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2278 emit_insn (insn);
2281 /* Prepare the operands for an scc instruction; make sure that the
2282 compare has been done and the result is in T_REG. */
2283 void
2284 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2286 rtx t_reg = get_t_reg_rtx ();
2287 enum rtx_code oldcode = code;
2289 /* First need a compare insn. */
2290 switch (code)
2292 case NE:
2293 /* It isn't possible to handle this case. */
2294 gcc_unreachable ();
2295 case LT:
2296 code = GT;
2297 break;
2298 case LE:
2299 code = GE;
2300 break;
2301 case LTU:
2302 code = GTU;
2303 break;
2304 case LEU:
2305 code = GEU;
2306 break;
2307 default:
2308 break;
2310 if (code != oldcode)
2311 std::swap (op0, op1);
2313 machine_mode mode = GET_MODE (op0);
2314 if (mode == VOIDmode)
2315 mode = GET_MODE (op1);
2317 op0 = force_reg (mode, op0);
2318 if ((code != EQ && code != NE
2319 && (op1 != const0_rtx
2320 || code == GTU || code == GEU || code == LTU || code == LEU))
2321 || (mode == DImode && op1 != const0_rtx)
2322 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2323 op1 = force_reg (mode, op1);
2325 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2326 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2327 mode);
2330 /* Called from the md file, set up the operands of a compare instruction. */
2331 void
2332 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2334 enum rtx_code code = GET_CODE (operands[0]);
2335 enum rtx_code branch_code;
2336 rtx op0 = operands[1];
2337 rtx op1 = operands[2];
2338 rtx insn;
2339 bool need_ccmpeq = false;
2341 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2343 op0 = force_reg (mode, op0);
2344 op1 = force_reg (mode, op1);
2346 else
2348 if (code != EQ || mode == DImode)
2350 /* Force args into regs, since we can't use constants here. */
2351 op0 = force_reg (mode, op0);
2352 if (op1 != const0_rtx || code == GTU || code == GEU)
2353 op1 = force_reg (mode, op1);
2357 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2359 if (code == LT
2360 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2361 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2363 std::swap (op0, op1);
2364 code = swap_condition (code);
2367 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2368 if (code == GE)
2370 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2371 need_ccmpeq = true;
2372 code = GT;
2375 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2376 to EQ/GT respectively. */
2377 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2380 switch (code)
2382 case EQ:
2383 case GT:
2384 case GE:
2385 case GTU:
2386 case GEU:
2387 branch_code = code;
2388 break;
2389 case NE:
2390 case LT:
2391 case LE:
2392 case LTU:
2393 case LEU:
2394 branch_code = reverse_condition (code);
2395 break;
2396 default:
2397 gcc_unreachable ();
2400 insn = gen_rtx_SET (get_t_reg_rtx (),
2401 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2403 sh_emit_set_t_insn (insn, mode);
2404 if (need_ccmpeq)
2405 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2407 if (branch_code == code)
2408 emit_jump_insn (gen_branch_true (operands[3]));
2409 else
2410 emit_jump_insn (gen_branch_false (operands[3]));
2413 void
2414 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2416 enum rtx_code code = GET_CODE (operands[1]);
2417 rtx op0 = operands[2];
2418 rtx op1 = operands[3];
2419 rtx_code_label *lab = NULL;
2420 bool invert = false;
2422 op0 = force_reg (mode, op0);
2423 if ((code != EQ && code != NE
2424 && (op1 != const0_rtx
2425 || code == GTU || code == GEU || code == LTU || code == LEU))
2426 || (mode == DImode && op1 != const0_rtx)
2427 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2428 op1 = force_reg (mode, op1);
2430 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2432 if (code == LT || code == LE)
2434 std::swap (op0, op1);
2435 code = swap_condition (code);
2437 if (code == GE)
2439 if (TARGET_IEEE)
2441 lab = gen_label_rtx ();
2442 sh_emit_scc_to_t (EQ, op0, op1);
2443 emit_jump_insn (gen_branch_true (lab));
2444 code = GT;
2446 else
2448 code = LT;
2449 invert = true;
2454 if (code == NE)
2456 code = EQ;
2457 invert = true;
2460 sh_emit_scc_to_t (code, op0, op1);
2461 if (lab)
2462 emit_label (lab);
2463 if (invert)
2464 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2465 else
2466 emit_move_insn (operands[0], get_t_reg_rtx ());
2469 /* Functions to output assembly code. */
2471 /* Return a sequence of instructions to perform DI or DF move.
2473 Since the SH cannot move a DI or DF in one instruction, we have
2474 to take care when we see overlapping source and dest registers. */
2475 const char *
2476 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2477 machine_mode mode)
2479 rtx dst = operands[0];
2480 rtx src = operands[1];
2482 if (MEM_P (dst)
2483 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2484 return "mov.l %T1,%0" "\n"
2485 " mov.l %1,%0";
2487 if (register_operand (dst, mode)
2488 && register_operand (src, mode))
2490 if (REGNO (src) == MACH_REG)
2491 return "sts mach,%S0" "\n"
2492 " sts macl,%R0";
2494 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2495 when mov.d r1,r0 do r1->r0 then r2->r1. */
2496 if (REGNO (src) + 1 == REGNO (dst))
2497 return "mov %T1,%T0" "\n"
2498 " mov %1,%0";
2499 else
2500 return "mov %1,%0" "\n"
2501 " mov %T1,%T0";
2503 else if (CONST_INT_P (src))
2505 if (INTVAL (src) < 0)
2506 output_asm_insn ("mov #-1,%S0", operands);
2507 else
2508 output_asm_insn ("mov #0,%S0", operands);
2510 return "mov %1,%R0";
2512 else if (MEM_P (src))
2514 int ptrreg = -1;
2515 int dreg = REGNO (dst);
2516 rtx inside = XEXP (src, 0);
2518 switch (GET_CODE (inside))
2520 case REG:
2521 ptrreg = REGNO (inside);
2522 break;
2524 case SUBREG:
2525 ptrreg = subreg_regno (inside);
2526 break;
2528 case PLUS:
2529 ptrreg = REGNO (XEXP (inside, 0));
2530 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2531 an offsettable address. Unfortunately, offsettable addresses use
2532 QImode to check the offset, and a QImode offsettable address
2533 requires r0 for the other operand, which is not currently
2534 supported, so we can't use the 'o' constraint.
2535 Thus we must check for and handle r0+REG addresses here.
2536 We punt for now, since this is likely very rare. */
2537 gcc_assert (!REG_P (XEXP (inside, 1)));
2538 break;
2540 case LABEL_REF:
2541 return "mov.l %1,%0" "\n"
2542 " mov.l %1+4,%T0";
2543 case POST_INC:
2544 return "mov.l %1,%0" "\n"
2545 " mov.l %1,%T0";
2546 default:
2547 gcc_unreachable ();
2550 /* Work out the safe way to copy. Copy into the second half first. */
2551 if (dreg == ptrreg)
2552 return "mov.l %T1,%T0" "\n"
2553 " mov.l %1,%0";
2556 return "mov.l %1,%0" "\n"
2557 " mov.l %T1,%T0";
2560 /* Print an instruction which would have gone into a delay slot after
2561 another instruction, but couldn't because the other instruction expanded
2562 into a sequence where putting the slot insn at the end wouldn't work. */
2563 static void
2564 print_slot (rtx_sequence *seq)
2566 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2568 seq->insn (1)->set_deleted ();
2571 const char *
2572 output_far_jump (rtx_insn *insn, rtx op)
2574 struct { rtx lab, reg, op; } this_jmp;
2575 rtx_code_label *braf_base_lab = NULL;
2576 const char *jump;
2577 int far;
2578 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2579 rtx_insn *prev;
2581 this_jmp.lab = gen_label_rtx ();
2583 if (TARGET_SH2
2584 && offset >= -32764
2585 && offset - get_attr_length (insn) <= 32766
2586 && ! CROSSING_JUMP_P (insn))
2588 far = 0;
2589 jump = "mov.w %O0,%1" "\n"
2590 " braf %1";
2592 else
2594 far = 1;
2595 if (flag_pic)
2597 if (TARGET_SH2)
2598 jump = "mov.l %O0,%1" "\n"
2599 " braf %1";
2600 else
2601 jump = "mov.l r0,@-r15" "\n"
2602 " mova %O0,r0" "\n"
2603 " mov.l @r0,%1" "\n"
2604 " add r0,%1" "\n"
2605 " mov.l @r15+,r0" "\n"
2606 " jmp @%1";
2608 else
2609 jump = "mov.l %O0,%1" "\n"
2610 " jmp @%1";
2612 /* If we have a scratch register available, use it. */
2613 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2614 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2616 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2617 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2618 jump = "mov.l r1,@-r15" "\n"
2619 " mova %O0,r0" "\n"
2620 " mov.l @r0,r1" "\n"
2621 " add r1,r0" "\n"
2622 " mov.l @r15+,r1" "\n"
2623 " jmp @%1";
2624 output_asm_insn (jump, &this_jmp.lab);
2625 if (dbr_sequence_length ())
2626 print_slot (final_sequence);
2627 else
2628 output_asm_insn ("nop", 0);
2630 else
2632 /* Output the delay slot insn first if any. */
2633 if (dbr_sequence_length ())
2634 print_slot (final_sequence);
2636 this_jmp.reg = gen_rtx_REG (SImode, 13);
2637 output_asm_insn ("mov.l r13,@-r15", 0);
2638 output_asm_insn (jump, &this_jmp.lab);
2639 output_asm_insn ("mov.l @r15+,r13", 0);
2641 if (far && flag_pic && TARGET_SH2)
2643 braf_base_lab = gen_label_rtx ();
2644 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2645 CODE_LABEL_NUMBER (braf_base_lab));
2647 if (far)
2648 output_asm_insn (".align 2", 0);
2649 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2650 this_jmp.op = op;
2651 if (far && flag_pic)
2653 if (TARGET_SH2)
2654 this_jmp.lab = braf_base_lab;
2655 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2657 else
2658 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2659 return "";
2662 /* Local label counter, used for constants in the pool and inside
2663 pattern branches. */
2664 static int lf = 100;
2666 /* Output code for ordinary branches. */
2667 const char *
2668 output_branch (int logic, rtx_insn *insn, rtx *operands)
2670 switch (get_attr_length (insn))
2672 case 6:
2673 /* This can happen if filling the delay slot has caused a forward
2674 branch to exceed its range (we could reverse it, but only
2675 when we know we won't overextend other branches; this should
2676 best be handled by relaxation).
2677 It can also happen when other condbranches hoist delay slot insn
2678 from their destination, thus leading to code size increase.
2679 But the branch will still be in the range -4092..+4098 bytes. */
2680 if (! TARGET_RELAX)
2682 int label = lf++;
2683 /* The call to print_slot will clobber the operands. */
2684 rtx op0 = operands[0];
2686 /* If the instruction in the delay slot is annulled (true), then
2687 there is no delay slot where we can put it now. The only safe
2688 place for it is after the label. final will do that by default. */
2690 if (final_sequence
2691 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2692 && get_attr_length (final_sequence->insn (1)))
2694 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2695 ASSEMBLER_DIALECT ? "/" : ".", label);
2696 print_slot (final_sequence);
2698 else
2699 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2701 output_asm_insn ("bra\t%l0", &op0);
2702 fprintf (asm_out_file, "\tnop\n");
2703 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2705 return "";
2707 /* FALLTHRU */
2708 /* When relaxing, handle this like a short branch. The linker
2709 will fix it up if it still doesn't fit after relaxation. */
2710 case 2:
2711 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2713 /* These are for SH2e, in which we have to account for the
2714 extra nop because of the hardware bug in annulled branches. */
2715 case 8:
2716 if (! TARGET_RELAX)
2718 int label = lf++;
2720 gcc_assert (!final_sequence
2721 || !(INSN_ANNULLED_BRANCH_P
2722 (XVECEXP (final_sequence, 0, 0))));
2723 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2724 logic ? "f" : "t",
2725 ASSEMBLER_DIALECT ? "/" : ".", label);
2726 fprintf (asm_out_file, "\tnop\n");
2727 output_asm_insn ("bra\t%l0", operands);
2728 fprintf (asm_out_file, "\tnop\n");
2729 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2731 return "";
2733 /* FALLTHRU */
2734 case 4:
2736 char buffer[10];
2738 sprintf (buffer, "b%s%ss\t%%l0",
2739 logic ? "t" : "f",
2740 ASSEMBLER_DIALECT ? "/" : ".");
2741 output_asm_insn (buffer, &operands[0]);
2742 return "nop";
2745 default:
2746 /* There should be no longer branches now - that would
2747 indicate that something has destroyed the branches set
2748 up in machine_dependent_reorg. */
2749 gcc_unreachable ();
2753 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2754 fill in operands 9 as a label to the successor insn.
2755 We try to use jump threading where possible.
2756 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2757 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2758 follow jmp and bt, if the address is in range. */
2759 const char *
2760 output_branchy_insn (enum rtx_code code, const char *templ,
2761 rtx_insn *insn, rtx *operands)
2763 rtx_insn *next_insn = NEXT_INSN (insn);
2765 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2767 rtx src = SET_SRC (PATTERN (next_insn));
2768 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2770 /* Following branch not taken */
2771 rtx_code_label *lab = gen_label_rtx ();
2772 emit_label_after (lab, next_insn);
2773 INSN_ADDRESSES_NEW (lab,
2774 INSN_ADDRESSES (INSN_UID (next_insn))
2775 + get_attr_length (next_insn));
2776 operands[9] = lab;
2777 return templ;
2779 else
2781 int offset = (branch_dest (next_insn)
2782 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2783 if (offset >= -252 && offset <= 258)
2785 if (GET_CODE (src) == IF_THEN_ELSE)
2786 /* branch_true */
2787 src = XEXP (src, 1);
2788 operands[9] = src;
2789 return templ;
2793 rtx_code_label *lab = gen_label_rtx ();
2794 emit_label_after (lab, insn);
2795 INSN_ADDRESSES_NEW (lab,
2796 INSN_ADDRESSES (INSN_UID (insn))
2797 + get_attr_length (insn));
2798 operands[9] = lab;
2799 return templ;
2802 const char *
2803 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2805 return output_branchy_insn (NE, "bt %l9" "\n"
2806 " fcmp/eq %1,%0",
2807 insn, operands);
2810 /* Output the start of the assembler file. */
2811 static void
2812 sh_file_start (void)
2814 default_file_start ();
2816 if (TARGET_ELF)
2817 /* We need to show the text section with the proper
2818 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2819 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2820 will complain. We can teach GAS specifically about the
2821 default attributes for our choice of text section, but
2822 then we would have to change GAS again if/when we change
2823 the text section name. */
2824 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2825 else
2826 /* Switch to the data section so that the coffsem symbol
2827 isn't in the text section. */
2828 switch_to_section (data_section);
2830 if (TARGET_LITTLE_ENDIAN)
2831 fputs ("\t.little\n", asm_out_file);
2834 /* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions
2835 need to be output as pointers to function descriptors for
2836 FDPIC. */
2838 static bool
2839 sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
2841 if (TARGET_FDPIC && size == UNITS_PER_WORD
2842 && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value))
2844 fputs ("\t.long\t", asm_out_file);
2845 output_addr_const (asm_out_file, value);
2846 fputs ("@FUNCDESC\n", asm_out_file);
2847 return true;
2849 return default_assemble_integer (value, size, aligned_p);
2852 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2853 static bool
2854 unspec_caller_rtx_p (rtx pat)
2856 rtx base, offset;
2857 split_const (pat, &base, &offset);
2859 if (GET_CODE (base) == UNSPEC)
2861 if (XINT (base, 1) == UNSPEC_CALLER)
2862 return true;
2863 for (int i = 0; i < XVECLEN (base, 0); i++)
2864 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2865 return true;
2867 return false;
2870 /* Indicate that INSN cannot be duplicated. This is true for insn
2871 that generates a unique label. */
2872 static bool
2873 sh_cannot_copy_insn_p (rtx_insn *insn)
2875 if (!reload_completed || !flag_pic)
2876 return false;
2878 if (!NONJUMP_INSN_P (insn))
2879 return false;
2880 if (asm_noperands (insn) >= 0)
2881 return false;
2883 rtx pat = PATTERN (insn);
2885 if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
2886 return false;
2888 if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL)
2890 rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
2891 if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0)))
2892 return true;
2895 if (GET_CODE (pat) != SET)
2896 return false;
2897 pat = SET_SRC (pat);
2899 if (unspec_caller_rtx_p (pat))
2900 return true;
2902 return false;
2905 /* Number of instructions used to make an arithmetic right shift by N. */
2906 static const char ashiftrt_insns[] =
2907 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2909 /* Description of a logical left or right shift, when expanded to a sequence
2910 of 1/2/8/16 shifts.
2911 Notice that one bit right shifts clobber the T bit. One bit left shifts
2912 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2913 enum
2915 ASHL_CLOBBERS_T = 1 << 0,
2916 LSHR_CLOBBERS_T = 1 << 1
2919 struct ashl_lshr_sequence
2921 char insn_count;
2922 signed char amount[6];
2923 char clobbers_t;
2926 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2928 { 0, { 0 }, 0 }, // 0
2929 { 1, { 1 }, LSHR_CLOBBERS_T },
2930 { 1, { 2 }, 0 },
2931 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2932 { 2, { 2, 2 }, 0 }, // 4
2933 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2934 { 3, { 2, 2, 2 }, 0 },
2935 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2936 { 1, { 8 }, 0 }, // 8
2937 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2938 { 2, { 8, 2 }, 0 },
2939 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2940 { 3, { 8, 2, 2 }, 0 }, // 12
2941 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2942 { 3, { 8, -2, 8 }, 0 },
2943 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2944 { 1, { 16 }, 0 }, // 16
2945 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2946 { 2, { 16, 2 }, 0 },
2947 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2948 { 3, { 16, 2, 2 }, 0 }, // 20
2949 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2950 { 3, { 16, -2, 8 }, 0 },
2951 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2952 { 2, { 16, 8 }, 0 }, // 24
2953 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2954 { 3, { 16, 8, 2 }, 0 },
2955 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2956 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2957 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2958 { 3, { 16, -2, 16 }, 0 },
2960 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2961 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2962 However, the shift-and combiner code needs this entry here to be in
2963 terms of real shift insns. */
2964 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2967 /* Individual shift amounts for shift amounts < 16, up to three highmost
2968 bits might be clobbered. This is typically used when combined with some
2969 kind of sign or zero extension. */
2970 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
2972 { 0, { 0 }, 0 }, // 0
2973 { 1, { 1 }, LSHR_CLOBBERS_T },
2974 { 1, { 2 }, 0 },
2975 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2976 { 2, { 2, 2 }, 0 }, // 4
2977 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2978 { 2, { 8, -2 }, 0 },
2979 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
2980 { 1, { 8 }, 0 }, // 8
2981 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2982 { 2, { 8, 2 }, 0 },
2983 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2984 { 3, { 8, 2, 2 }, 0 }, // 12
2985 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
2986 { 2, { 16, -2 }, 0 },
2987 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
2988 { 1, { 16 }, 0 }, // 16
2989 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2990 { 2, { 16, 2 }, 0 },
2991 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2992 { 3, { 16, 2, 2 }, 0 }, // 20
2993 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2994 { 3, { 16, -2, 8 }, 0 },
2995 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2996 { 2, { 16, 8 }, 0 }, // 24
2997 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2998 { 3, { 16, 8, 2 }, 0 },
2999 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
3000 { 4, { 16, 8, 2, 2 }, 0 }, // 28
3001 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3002 { 3, { 16, -2, 16 }, 0 },
3003 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3006 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3007 will clobber the T bit. */
3008 bool
3009 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3011 gcc_assert (CONST_INT_P (shift_amount));
3013 const int shift_amount_i = INTVAL (shift_amount) & 31;
3015 /* Special case for shift count of 31: use and-rotl sequence. */
3016 if (shift_amount_i == 31)
3017 return true;
3019 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3020 & ASHL_CLOBBERS_T) != 0;
3023 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3024 instructions will clobber the T bit. */
3025 bool
3026 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3028 gcc_assert (CONST_INT_P (shift_amount));
3030 /* For right shifts the constant might be negative. */
3031 const int shift_amount_i = std::abs (INTVAL (shift_amount)) & 31;
3033 /* Special case for shift count of 31: use shll-movt sequence. */
3034 if (shift_amount_i == 31)
3035 return true;
3037 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3038 & LSHR_CLOBBERS_T) != 0;
3041 /* Return true if it is potentially beneficial to use a dynamic shift
3042 instruction (shad / shar) instead of a combination of 1/2/8/16
3043 shift instructions for the specified shift count.
3044 If dynamic shifts are not available, always return false. */
3045 bool
3046 sh_dynamicalize_shift_p (rtx count)
3048 gcc_assert (CONST_INT_P (count));
3050 /* For right shifts the constant might be negative. */
3051 const int shift_amount_i = std::abs (INTVAL (count)) & 31;
3052 int insn_count;
3054 /* For left and right shifts, there are shorter 2 insn sequences for
3055 shift amounts of 31. */
3056 if (shift_amount_i == 31)
3057 insn_count = 2;
3058 else
3059 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3061 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3064 /* Assuming we have a value that has been sign-extended by at least one bit,
3065 can we use the ext_shift_amounts with the last shift turned to an
3066 arithmetic shift to shift it by N without data loss, and quicker than by
3067 other means? */
3068 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3070 /* Return the cost of a shift. */
3071 static inline int
3072 shiftcosts (rtx x)
3074 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3076 if (GET_MODE (x) == DImode
3077 && CONST_INT_P (XEXP (x, 1))
3078 && INTVAL (XEXP (x, 1)) == 1)
3079 return 2;
3081 /* Everything else is invalid, because there is no pattern for it. */
3082 return -1;
3084 /* If shift by a non constant, then this will be expensive. */
3085 if (!CONST_INT_P (XEXP (x, 1)))
3086 return SH_DYNAMIC_SHIFT_COST;
3088 /* Otherwise, return the true cost in instructions. Cope with out of range
3089 shift counts more or less arbitrarily. */
3090 int value = INTVAL (XEXP (x, 1)) & 31;
3092 if (GET_CODE (x) == ASHIFTRT)
3094 int cost = ashiftrt_insns[value];
3095 /* If dynamic shifts are available and profitable in this case, then we
3096 put the constant in a reg and use shad. */
3097 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3098 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3099 return cost;
3101 else
3102 return ashl_lshr_seq[value].insn_count;
3105 /* Return the cost of an AND/XOR/IOR operation. */
3106 static inline int
3107 and_xor_ior_costs (rtx x, int code)
3109 /* On SH1-4 we have only max. SImode operations.
3110 Double the cost for modes > SImode. */
3111 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3113 /* A logical operation with two registers is a single cycle
3114 instruction. */
3115 if (!CONST_INT_P (XEXP (x, 1)))
3116 return 1 * cost_scale;
3118 int i = INTVAL (XEXP (x, 1));
3120 /* These constants are single cycle extu.[bw] instructions. */
3121 if ((i == 0xff || i == 0xffff) && code == AND)
3122 return 1 * cost_scale;
3123 /* Constants that can be used in an instruction as an immediate are
3124 a single cycle, but this requires r0, so make it a little more
3125 expensive. */
3126 if (CONST_OK_FOR_K08 (i))
3127 return 2 * cost_scale;
3128 /* Constants that can be loaded with a mov immediate need one more cycle.
3129 This case is probably unnecessary. */
3130 if (CONST_OK_FOR_I08 (i))
3131 return 2 * cost_scale;
3132 /* Any other constant requires an additional 2 cycle pc-relative load.
3133 This case is probably unnecessary. */
3134 return 3 * cost_scale;
3137 /* Return the cost of an addition or a subtraction. */
3138 static inline int
3139 addsubcosts (rtx x)
3141 if (GET_MODE (x) == SImode)
3143 /* The addc or subc patterns will eventually become one or two
3144 instructions. Below are some costs for some of the patterns
3145 which combine would reject because the costs of the individual
3146 insns in the patterns are lower.
3148 FIXME: It would be much easier if we had something like insn cost
3149 attributes and the cost calculation machinery used those attributes
3150 in the first place. This would eliminate redundant recog-like C
3151 code to calculate costs of complex patterns. */
3152 rtx op0 = XEXP (x, 0);
3153 rtx op1 = XEXP (x, 1);
3155 if (GET_CODE (x) == PLUS)
3157 if (GET_CODE (op0) == AND
3158 && XEXP (op0, 1) == const1_rtx
3159 && (GET_CODE (op1) == PLUS
3160 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3161 return 1;
3163 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3164 && GET_CODE (op1) == LSHIFTRT
3165 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3166 return 1;
3168 /* Let's assume that adding the result of an insns that stores into
3169 the T bit is cheap. */
3170 if (treg_set_expr (op1, SImode))
3171 return 1;
3172 if (treg_set_expr (op0, SImode))
3173 return 1;
3176 /* On SH1-4 we have only max. SImode operations.
3177 Double the cost for modes > SImode. */
3178 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3180 /* Adding a register is a single cycle insn. */
3181 if (REG_P (XEXP (x, 1))
3182 || GET_CODE (XEXP (x, 1)) == SUBREG)
3183 return 1 * cost_scale;
3185 /* Likewise for small constants. */
3186 if (CONST_INT_P (XEXP (x, 1))
3187 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3188 return 1 * cost_scale;
3190 /* Any other constant requires a 2 cycle pc-relative load plus an
3191 addition. */
3192 return 3 * cost_scale;
3195 /* Return the cost of a multiply. */
3196 static inline int
3197 multcosts (rtx x ATTRIBUTE_UNUSED)
3199 if (sh_multcost >= 0)
3200 return sh_multcost;
3202 if (TARGET_SH2)
3204 /* We have a mul insn, so we can never take more than the mul and the
3205 read of the mac reg, but count more because of the latency and extra
3206 reg usage. */
3207 if (optimize_size)
3208 return 2;
3209 return 3;
3212 /* If we're aiming at small code, then just count the number of
3213 insns in a multiply call sequence. */
3214 if (optimize_size)
3215 return 5;
3217 /* Otherwise count all the insns in the routine we'd be calling too. */
3218 return 20;
3221 /* Compute a (partial) cost for rtx X. Return true if the complete
3222 cost has been computed, and false if subexpressions should be
3223 scanned. In either case, *TOTAL contains the cost result. */
3224 static bool
3225 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
3226 int opno ATTRIBUTE_UNUSED,
3227 int *total, bool speed ATTRIBUTE_UNUSED)
3229 int code = GET_CODE (x);
3231 switch (code)
3233 /* The lower-subreg pass decides whether to split multi-word regs
3234 into individual regs by looking at the cost for a SET of certain
3235 modes with the following patterns:
3236 (set (reg) (reg))
3237 (set (reg) (const_int 0))
3238 On machines that support vector-move operations a multi-word move
3239 is the same cost as individual reg move. On SH there is no
3240 vector-move, so we have to provide the correct cost in the number
3241 of move insns to load/store the reg of the mode in question. */
3242 case SET:
3243 if (sh_movt_set_dest (x) != NULL || sh_movrt_set_dest (x) != NULL)
3245 *total = COSTS_N_INSNS (1);
3246 return true;
3249 if (register_operand (SET_DEST (x), VOIDmode)
3250 && (register_operand (SET_SRC (x), VOIDmode)
3251 || satisfies_constraint_Z (SET_SRC (x))))
3253 const machine_mode mode = GET_MODE (SET_DEST (x));
3254 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3255 / mov_insn_size (mode, TARGET_SH2A));
3256 return true;
3258 return false;
3260 /* The cost of a mem access is mainly the cost of the address mode. */
3261 case MEM:
3262 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3263 true);
3264 return true;
3266 case IF_THEN_ELSE:
3267 /* This case is required for the if_then_else negc pattern. */
3268 if (treg_set_expr (XEXP (x, 0), SImode))
3270 *total = COSTS_N_INSNS (1);
3271 return true;
3273 else
3274 return false;
3276 /* Zero extracts of single bits are usually combine patterns for the
3277 tst insns. */
3278 case ZERO_EXTRACT:
3279 if (GET_CODE (XEXP (x, 0)) == XOR
3280 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3281 && XEXP (x, 1) == const1_rtx
3282 && CONST_INT_P (XEXP (x, 2))
3283 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3284 /* Check that the xor constaint overlaps with the extracted bit. */
3285 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3287 *total = 1; //COSTS_N_INSNS (1);
3288 return true;
3291 /* div0s variant. */
3292 if (GET_CODE (XEXP (x, 0)) == XOR
3293 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3294 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3296 *total = 1;
3297 return true;
3299 return false;
3301 /* The cost of a sign or zero extend depends on whether the source is a
3302 reg or a mem. In case of a mem take the address into account. */
3303 case SIGN_EXTEND:
3304 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3306 *total = COSTS_N_INSNS (1);
3307 return true;
3309 if (MEM_P (XEXP (x, 0)))
3311 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3312 GET_MODE (XEXP (x, 0)),
3313 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3314 return true;
3316 return false;
3318 case ZERO_EXTEND:
3319 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3321 *total = COSTS_N_INSNS (1);
3322 return true;
3324 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3325 && (GET_MODE (XEXP (x, 0)) == QImode
3326 || GET_MODE (XEXP (x, 0)) == HImode))
3328 /* Handle SH2A's movu.b and movu.w insn. */
3329 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3330 GET_MODE (XEXP (x, 0)),
3331 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3332 return true;
3334 return false;
3336 /* mems for SFmode and DFmode can be inside a parallel due to
3337 the way the fpscr is handled. */
3338 case PARALLEL:
3339 for (int i = 0; i < XVECLEN (x, 0); i++)
3341 rtx xx = XVECEXP (x, 0, i);
3342 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3344 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3345 GET_MODE (XEXP (xx, 0)),
3346 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3347 return true;
3349 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3351 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3352 GET_MODE (XEXP (xx, 1)),
3353 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3354 return true;
3358 if (sh_1el_vec (x, VOIDmode))
3359 *total = outer_code != SET;
3360 else if (sh_rep_vec (x, VOIDmode))
3361 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3362 + (outer_code != SET));
3363 else
3364 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3365 return true;
3367 case CONST_INT:
3368 if (CONST_OK_FOR_I08 (INTVAL (x)))
3369 *total = 0;
3370 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3371 && CONST_OK_FOR_K08 (INTVAL (x)))
3372 *total = 1;
3373 /* prepare_cmp_insn will force costly constants int registers before
3374 the cbranch[sd]i4 patterns can see them, so preserve potentially
3375 interesting ones not covered by I08 above. */
3376 else if (outer_code == COMPARE
3377 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3378 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3379 || INTVAL (x) == 0x7fffffff
3380 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3381 *total = 1;
3382 else
3383 *total = 8;
3384 return true;
3386 case EQ:
3387 /* An and with a constant compared against zero is
3388 most likely going to be a TST #imm, R0 instruction. */
3389 if (XEXP (x, 1) == const0_rtx
3390 && ((GET_CODE (XEXP (x, 0)) == AND
3391 || (SUBREG_P (XEXP (x, 0))
3392 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND))
3393 || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT))
3395 *total = 1;
3396 return true;
3399 else if (XEXP (x, 1) == const0_rtx
3400 && GET_CODE (XEXP (x, 0)) == AND
3401 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3402 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3403 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3404 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3406 *total = 1;
3407 return true;
3409 else
3410 return false;
3412 case SMIN:
3413 case SMAX:
3414 /* This is most likely a clips.b or clips.w insn that is being made up
3415 by combine. */
3416 if (TARGET_SH2A
3417 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3418 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3419 && REG_P (XEXP (XEXP (x, 0), 0))
3420 && CONST_INT_P (XEXP (x, 1)))
3422 *total = COSTS_N_INSNS (1);
3423 return true;
3425 else
3426 return false;
3428 case CONST:
3429 case LABEL_REF:
3430 case SYMBOL_REF:
3431 *total = 5;
3432 return true;
3434 case CONST_DOUBLE:
3435 /* prepare_cmp_insn will force costly constants int registers before
3436 the cbranchdi4 pattern can see them, so preserve potentially
3437 interesting ones. */
3438 if (outer_code == COMPARE && GET_MODE (x) == DImode)
3439 *total = 1;
3440 else
3441 *total = 10;
3442 return true;
3444 case CONST_VECTOR:
3445 /* FIXME: This looks broken. Only the last statement has any effect.
3446 Probably this could be folded with the PARALLEL case? */
3447 if (x == CONST0_RTX (GET_MODE (x)))
3448 *total = 0;
3449 else if (sh_1el_vec (x, VOIDmode))
3450 *total = outer_code != SET;
3451 if (sh_rep_vec (x, VOIDmode))
3452 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3453 + (outer_code != SET));
3454 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3455 return true;
3457 case PLUS:
3458 case MINUS:
3459 *total = COSTS_N_INSNS (addsubcosts (x));
3460 return true;
3462 case AND:
3463 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3464 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3466 *total = COSTS_N_INSNS (1);
3467 return true;
3469 /* Fall through. */
3471 case XOR:
3472 case IOR:
3473 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3474 return true;
3476 case MULT:
3477 *total = COSTS_N_INSNS (multcosts (x));
3478 return true;
3480 case LT:
3481 case GE:
3482 /* div0s sign comparison. */
3483 if (GET_CODE (XEXP (x, 0)) == XOR
3484 && REG_P ((XEXP (XEXP (x, 0), 0)))
3485 && REG_P ((XEXP (XEXP (x, 0), 1)))
3486 && satisfies_constraint_Z (XEXP (x, 1)))
3488 *total = COSTS_N_INSNS (1);
3489 return true;
3491 else
3492 return false;
3494 case LSHIFTRT:
3495 /* div0s sign comparison. */
3496 if (GET_CODE (XEXP (x, 0)) == XOR
3497 && REG_P ((XEXP (XEXP (x, 0), 0)))
3498 && REG_P ((XEXP (XEXP (x, 0), 1)))
3499 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3501 *total = COSTS_N_INSNS (1);
3502 return true;
3504 /* FALLTHRU */
3505 case ASHIFT:
3506 case ASHIFTRT:
3508 int cost = shiftcosts (x);
3509 if (cost < 0)
3510 return false;
3511 *total = COSTS_N_INSNS (cost);
3512 return true;
3515 case DIV:
3516 case UDIV:
3517 case MOD:
3518 case UMOD:
3519 *total = COSTS_N_INSNS (20);
3520 return true;
3522 case FLOAT:
3523 case FIX:
3524 *total = 100;
3525 return true;
3527 default:
3528 return false;
3532 /* Determine the size of the fundamental move insn that will be used
3533 for the specified mode. */
3534 static inline int
3535 mov_insn_size (machine_mode mode, bool consider_sh2a)
3537 const int mode_sz = GET_MODE_SIZE (mode);
3539 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3540 || (TARGET_FMOVD && mode == DFmode))
3541 return mode_sz;
3542 else
3544 /* The max. available mode for actual move insns is SImode.
3545 Larger accesses will be split into multiple loads/stores. */
3546 const int max_mov_sz = GET_MODE_SIZE (SImode);
3547 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3551 /* Determine the maximum possible displacement for a move insn for the
3552 specified mode. */
3554 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3556 /* The 4 byte displacement move insns are the same as the 2 byte
3557 versions but take a 12 bit displacement. All we need to do is to
3558 scale the max. displacement value accordingly. */
3559 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3561 /* SH2A supports FPU move insns with 12 bit displacements.
3562 Other variants to do not support any kind of displacements for
3563 FPU move insns. */
3564 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3565 return 0;
3566 else
3568 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3569 const int mode_sz = GET_MODE_SIZE (mode);
3570 int r = 15 * mov_insn_sz * disp_scale;
3572 /* If the mov insn will be split into multiple loads/stores, the
3573 maximum possible displacement is a bit smaller. */
3574 if (mode_sz > mov_insn_sz)
3575 r -= mode_sz - mov_insn_sz;
3576 return r;
3580 /* Determine the alignment mask for a move insn of the
3581 specified mode. */
3582 static inline int
3583 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3585 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3586 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3589 /* Return the displacement value of a displacement address. */
3590 HOST_WIDE_INT
3591 sh_disp_addr_displacement (rtx x)
3593 gcc_assert (satisfies_constraint_Sdd (x));
3594 return INTVAL (XEXP (XEXP (x, 0), 1));
3597 /* Compute the cost of an address. */
3598 static int
3599 sh_address_cost (rtx x, machine_mode mode,
3600 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3602 /* 'GBR + 0'. Account one more because of R0 restriction. */
3603 if (REG_P (x) && REGNO (x) == GBR_REG)
3604 return 2;
3606 /* Simple reg, post-inc, pre-dec addressing. */
3607 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3608 return 1;
3610 /* 'reg + disp' addressing. */
3611 if (GET_CODE (x) == PLUS
3612 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3614 /* 'GBR + disp'. Account one more because of R0 restriction. */
3615 if (REGNO (XEXP (x, 0)) == GBR_REG
3616 && gbr_displacement (XEXP (x, 1), mode))
3617 return 2;
3619 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3621 if (offset == 0)
3622 return 1;
3624 /* The displacement would fit into a 2 byte move insn.
3625 HImode and QImode loads/stores with displacement put pressure on
3626 R0 which will most likely require another reg copy. Thus account
3627 a higher cost for that. */
3628 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3629 return (mode == HImode || mode == QImode) ? 2 : 1;
3631 /* The displacement would fit into a 4 byte move insn (SH2A). */
3632 if (TARGET_SH2A
3633 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3634 return 2;
3636 /* The displacement is probably out of range and will require extra
3637 calculations. */
3638 return 3;
3641 /* 'reg + reg' addressing. Account a slightly higher cost because of
3642 increased pressure on R0. */
3643 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1)))
3644 return 3;
3646 /* Not sure what it is - probably expensive. */
3647 return 10;
3650 /* Code to expand a shift. */
3651 static void
3652 gen_ashift (int type, int n, rtx reg)
3654 rtx n_rtx;
3656 /* Negative values here come from the shift_amounts array. */
3657 if (n < 0)
3659 if (type == ASHIFT)
3660 type = LSHIFTRT;
3661 else
3662 type = ASHIFT;
3663 n = -n;
3666 n_rtx = GEN_INT (n);
3667 gcc_assert (satisfies_constraint_P27 (n_rtx));
3669 switch (type)
3671 case ASHIFTRT:
3672 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3673 break;
3674 case LSHIFTRT:
3675 if (n == 1)
3676 emit_insn (gen_shlr (reg, reg));
3677 else
3678 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3679 break;
3680 case ASHIFT:
3681 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3682 break;
3683 default:
3684 gcc_unreachable ();
3688 /* Code to expand a HImode shift. */
3689 static void
3690 gen_ashift_hi (int type, int n, rtx reg)
3692 /* Negative values here come from the shift_amounts array. */
3693 if (n < 0)
3695 if (type == ASHIFT)
3696 type = LSHIFTRT;
3697 else
3698 type = ASHIFT;
3699 n = -n;
3702 switch (type)
3704 case ASHIFTRT:
3705 case LSHIFTRT:
3706 /* We don't have HImode right shift operations because using the
3707 ordinary 32 bit shift instructions for that doesn't generate proper
3708 zero/sign extension.
3709 gen_ashift_hi is only called in contexts where we know that the
3710 sign extension works out correctly. */
3712 int offset = 0;
3713 if (GET_CODE (reg) == SUBREG)
3715 offset = SUBREG_BYTE (reg);
3716 reg = SUBREG_REG (reg);
3718 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3719 break;
3721 case ASHIFT:
3722 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3723 break;
3727 /* Output RTL to split a constant shift into its component SH constant
3728 shift instructions. */
3729 void
3730 gen_shifty_op (int code, rtx *operands)
3732 int value = INTVAL (operands[2]);
3733 int max, i;
3735 /* Truncate the shift count in case it is out of bounds. */
3736 value = value & 31;
3738 if (value == 31)
3740 if (code == LSHIFTRT)
3742 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3743 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3744 return;
3746 else if (code == ASHIFT)
3748 /* There is a two instruction sequence for 31 bit left shifts,
3749 but it requires r0. */
3750 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3752 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3753 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3754 return;
3758 else if (value == 0)
3760 /* This can happen even when optimizing, if there were subregs before
3761 reload. Don't output a nop here, as this is never optimized away;
3762 use a no-op move instead. */
3763 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3764 return;
3767 max = ashl_lshr_seq[value].insn_count;
3768 for (i = 0; i < max; i++)
3769 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3772 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3773 don't matter. */
3774 void
3775 gen_shifty_hi_op (int code, rtx *operands)
3777 int value = INTVAL (operands[2]);
3778 int max, i;
3779 void (*gen_fun) (int, int, rtx);
3781 /* This operation is used by and_shl for SImode values with a few
3782 high bits known to be cleared. */
3783 value &= 31;
3784 if (value == 0)
3786 emit_insn (gen_nop ());
3787 return;
3790 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3791 if (code == ASHIFT)
3793 max = ext_ashl_lshr_seq[value].insn_count;
3794 for (i = 0; i < max; i++)
3795 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3797 else
3798 /* When shifting right, emit the shifts in reverse order, so that
3799 solitary negative values come first. */
3800 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3801 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3804 /* Output RTL for an arithmetic right shift.
3805 ??? Rewrite to use super-optimizer sequences. */
3806 bool
3807 expand_ashiftrt (rtx *operands)
3809 rtx wrk;
3810 char func[18];
3811 int value;
3813 if (TARGET_DYNSHIFT)
3815 if (!CONST_INT_P (operands[2]))
3817 rtx count = copy_to_mode_reg (SImode, operands[2]);
3818 emit_insn (gen_negsi2 (count, count));
3819 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3820 return true;
3822 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3823 > 1 + SH_DYNAMIC_SHIFT_COST)
3825 rtx count
3826 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3827 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3828 return true;
3831 if (!CONST_INT_P (operands[2]))
3832 return false;
3834 value = INTVAL (operands[2]) & 31;
3836 if (value == 31)
3838 /* If we are called from abs expansion, arrange things so that we
3839 we can use a single MT instruction that doesn't clobber the source,
3840 if LICM can hoist out the load of the constant zero. */
3841 if (currently_expanding_to_rtl)
3843 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3844 operands[1]));
3845 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3846 return true;
3848 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3849 return true;
3851 else if (value >= 16 && value <= 19)
3853 wrk = gen_reg_rtx (SImode);
3854 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3855 value -= 16;
3856 while (value--)
3857 gen_ashift (ASHIFTRT, 1, wrk);
3858 emit_move_insn (operands[0], wrk);
3859 return true;
3861 /* Expand a short sequence inline, longer call a magic routine. */
3862 else if (value <= 5)
3864 wrk = gen_reg_rtx (SImode);
3865 emit_move_insn (wrk, operands[1]);
3866 while (value--)
3867 gen_ashift (ASHIFTRT, 1, wrk);
3868 emit_move_insn (operands[0], wrk);
3869 return true;
3872 wrk = gen_reg_rtx (Pmode);
3874 /* Load the value into an arg reg and call a helper. */
3875 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3876 sprintf (func, "__ashiftrt_r4_%d", value);
3877 rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
3878 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
3879 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3880 return true;
3883 /* Try to find a good way to implement the combiner pattern
3884 [(set (match_operand:SI 0 "register_operand" "r")
3885 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3886 (match_operand:SI 2 "const_int_operand" "n"))
3887 (match_operand:SI 3 "const_int_operand" "n"))) .
3888 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3889 return 0 for simple right / left or left/right shift combination.
3890 return 1 for a combination of shifts with zero_extend.
3891 return 2 for a combination of shifts with an AND that needs r0.
3892 return 3 for a combination of shifts with an AND that needs an extra
3893 scratch register, when the three highmost bits of the AND mask are clear.
3894 return 4 for a combination of shifts with an AND that needs an extra
3895 scratch register, when any of the three highmost bits of the AND mask
3896 is set.
3897 If ATTRP is set, store an initial right shift width in ATTRP[0],
3898 and the instruction length in ATTRP[1] . These values are not valid
3899 when returning 0.
3900 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3901 shift_amounts for the last shift value that is to be used before the
3902 sign extend. */
3904 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3906 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3907 int left = INTVAL (left_rtx), right;
3908 int best = 0;
3909 int cost, best_cost = 10000;
3910 int best_right = 0, best_len = 0;
3911 int i;
3912 int can_ext;
3914 if (left < 0 || left > 31)
3915 return 0;
3916 if (CONST_INT_P (mask_rtx))
3917 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3918 else
3919 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3920 /* Can this be expressed as a right shift / left shift pair? */
3921 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3922 right = exact_log2 (lsb);
3923 mask2 = ~(mask + lsb - 1);
3924 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3925 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3926 if (! mask2)
3927 best_cost = ashl_lshr_seq[right].insn_count
3928 + ashl_lshr_seq[right + left].insn_count;
3929 /* mask has no trailing zeroes <==> ! right */
3930 else if (! right && mask2 == ~(lsb2 - 1))
3932 int late_right = exact_log2 (lsb2);
3933 best_cost = ashl_lshr_seq[left + late_right].insn_count
3934 + ashl_lshr_seq[late_right].insn_count;
3936 /* Try to use zero extend. */
3937 if (mask2 == ~(lsb2 - 1))
3939 int width, first;
3941 for (width = 8; width <= 16; width += 8)
3943 /* Can we zero-extend right away? */
3944 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3946 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3947 + ext_ashl_lshr_seq[left + right].insn_count;
3948 if (cost < best_cost)
3950 best = 1;
3951 best_cost = cost;
3952 best_right = right;
3953 best_len = cost;
3954 if (attrp)
3955 attrp[2] = -1;
3957 continue;
3959 /* ??? Could try to put zero extend into initial right shift,
3960 or even shift a bit left before the right shift. */
3961 /* Determine value of first part of left shift, to get to the
3962 zero extend cut-off point. */
3963 first = width - exact_log2 (lsb2) + right;
3964 if (first >= 0 && right + left - first >= 0)
3966 cost = ext_ashl_lshr_seq[right].insn_count
3967 + ext_ashl_lshr_seq[first].insn_count + 1
3968 + ext_ashl_lshr_seq[right + left - first].insn_count;
3970 if (cost < best_cost)
3972 best = 1;
3973 best_cost = cost;
3974 best_right = right;
3975 best_len = cost;
3976 if (attrp)
3977 attrp[2] = first;
3982 /* Try to use r0 AND pattern */
3983 for (i = 0; i <= 2; i++)
3985 if (i > right)
3986 break;
3987 if (! CONST_OK_FOR_K08 (mask >> i))
3988 continue;
3989 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
3990 if (cost < best_cost)
3992 best = 2;
3993 best_cost = cost;
3994 best_right = i;
3995 best_len = cost - 1;
3998 /* Try to use a scratch register to hold the AND operand. */
3999 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
4000 for (i = 0; i <= 2; i++)
4002 if (i > right)
4003 break;
4004 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4005 + (can_ext
4006 ? ext_ashl_lshr_seq
4007 : ashl_lshr_seq)[left + i].insn_count;
4008 if (cost < best_cost)
4010 best = 4 - can_ext;
4011 best_cost = cost;
4012 best_right = i;
4013 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4017 if (attrp)
4019 attrp[0] = best_right;
4020 attrp[1] = best_len;
4022 return best;
4025 /* This is used in length attributes of the unnamed instructions
4026 corresponding to shl_and_kind return values of 1 and 2. */
4028 shl_and_length (rtx insn)
4030 rtx set_src, left_rtx, mask_rtx;
4031 int attributes[3];
4033 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4034 left_rtx = XEXP (XEXP (set_src, 0), 1);
4035 mask_rtx = XEXP (set_src, 1);
4036 shl_and_kind (left_rtx, mask_rtx, attributes);
4037 return attributes[1];
4040 /* This is used in length attribute of the and_shl_scratch instruction. */
4042 shl_and_scr_length (rtx insn)
4044 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4045 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4046 rtx op = XEXP (set_src, 0);
4047 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4048 op = XEXP (XEXP (op, 0), 0);
4049 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4052 /* Generate rtl for instructions for which shl_and_kind advised a particular
4053 method of generating them, i.e. returned zero. */
4054 bool
4055 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4057 int attributes[3];
4058 unsigned HOST_WIDE_INT mask;
4059 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4060 int right, total_shift;
4061 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4063 right = attributes[0];
4064 total_shift = INTVAL (left_rtx) + right;
4065 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4066 switch (kind)
4068 default:
4069 return true;
4070 case 1:
4072 int first = attributes[2];
4073 rtx operands[3];
4075 if (first < 0)
4077 emit_insn ((mask << right) <= 0xff
4078 ? gen_zero_extendqisi2 (dest,
4079 gen_lowpart (QImode, source))
4080 : gen_zero_extendhisi2 (dest,
4081 gen_lowpart (HImode, source)));
4082 source = dest;
4084 if (source != dest)
4085 emit_insn (gen_movsi (dest, source));
4086 operands[0] = dest;
4087 if (right)
4089 operands[2] = GEN_INT (right);
4090 gen_shifty_hi_op (LSHIFTRT, operands);
4092 if (first > 0)
4094 operands[2] = GEN_INT (first);
4095 gen_shifty_hi_op (ASHIFT, operands);
4096 total_shift -= first;
4097 mask <<= first;
4099 if (first >= 0)
4100 emit_insn (mask <= 0xff
4101 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4102 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4103 if (total_shift > 0)
4105 operands[2] = GEN_INT (total_shift);
4106 gen_shifty_hi_op (ASHIFT, operands);
4108 break;
4110 case 4:
4111 shift_gen_fun = gen_shifty_op;
4112 /* FALLTHRU */
4113 case 3:
4114 /* If the topmost bit that matters is set, set the topmost bits
4115 that don't matter. This way, we might be able to get a shorter
4116 signed constant. */
4117 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4118 mask |= (HOST_WIDE_INT) ((HOST_WIDE_INT_M1U) << (31 - total_shift));
4119 /* FALLTHRU */
4120 case 2:
4121 /* Don't expand fine-grained when combining, because that will
4122 make the pattern fail. */
4123 if (currently_expanding_to_rtl
4124 || reload_in_progress || reload_completed)
4126 rtx operands[3];
4128 /* Cases 3 and 4 should be handled by this split
4129 only while combining */
4130 gcc_assert (kind <= 2);
4131 if (right)
4133 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4134 source = dest;
4136 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4137 if (total_shift)
4139 operands[0] = dest;
4140 operands[1] = dest;
4141 operands[2] = GEN_INT (total_shift);
4142 shift_gen_fun (ASHIFT, operands);
4144 break;
4146 else
4148 int neg = 0;
4149 if (kind != 4 && total_shift < 16)
4151 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4152 if (neg > 0)
4153 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4154 else
4155 neg = 0;
4157 emit_insn (gen_and_shl_scratch (dest, source,
4158 GEN_INT (right),
4159 GEN_INT (mask),
4160 GEN_INT (total_shift + neg),
4161 GEN_INT (neg)));
4162 emit_insn (gen_movsi (dest, dest));
4163 break;
4166 return false;
4169 /* Try to find a good way to implement the combiner pattern
4170 [(set (match_operand:SI 0 "register_operand" "=r")
4171 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4172 (match_operand:SI 2 "const_int_operand" "n")
4173 (match_operand:SI 3 "const_int_operand" "n")
4174 (const_int 0)))
4175 (clobber (reg:SI T_REG))]
4176 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4177 return 0 for simple left / right shift combination.
4178 return 1 for left shift / 8 bit sign extend / left shift.
4179 return 2 for left shift / 16 bit sign extend / left shift.
4180 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4181 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4182 return 5 for left shift / 16 bit sign extend / right shift
4183 return 6 for < 8 bit sign extend / left shift.
4184 return 7 for < 8 bit sign extend / left shift / single right shift.
4185 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4187 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4189 int left, size, insize, ext;
4190 int cost = 0, best_cost;
4191 int kind;
4193 left = INTVAL (left_rtx);
4194 size = INTVAL (size_rtx);
4195 insize = size - left;
4196 gcc_assert (insize > 0);
4197 /* Default to left / right shift. */
4198 kind = 0;
4199 best_cost = ashl_lshr_seq[32 - insize].insn_count
4200 + ashl_lshr_seq[32 - size].insn_count;
4201 if (size <= 16)
4203 /* 16 bit shift / sign extend / 16 bit shift */
4204 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4205 + ashl_lshr_seq[16 - size].insn_count;
4206 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4207 below, by alternative 3 or something even better. */
4208 if (cost < best_cost)
4210 kind = 5;
4211 best_cost = cost;
4214 /* Try a plain sign extend between two shifts. */
4215 for (ext = 16; ext >= insize; ext -= 8)
4217 if (ext <= size)
4219 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4220 + ashl_lshr_seq[size - ext].insn_count;
4221 if (cost < best_cost)
4223 kind = ext / (unsigned) 8;
4224 best_cost = cost;
4227 /* Check if we can do a sloppy shift with a final signed shift
4228 restoring the sign. */
4229 if (EXT_SHIFT_SIGNED (size - ext))
4230 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4231 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4232 /* If not, maybe it's still cheaper to do the second shift sloppy,
4233 and do a final sign extend? */
4234 else if (size <= 16)
4235 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4236 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4237 + 1;
4238 else
4239 continue;
4240 if (cost < best_cost)
4242 kind = ext / (unsigned) 8 + 2;
4243 best_cost = cost;
4246 /* Check if we can sign extend in r0 */
4247 if (insize < 8)
4249 cost = 3 + ashl_lshr_seq[left].insn_count;
4250 if (cost < best_cost)
4252 kind = 6;
4253 best_cost = cost;
4255 /* Try the same with a final signed shift. */
4256 if (left < 31)
4258 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4259 if (cost < best_cost)
4261 kind = 7;
4262 best_cost = cost;
4266 if (TARGET_DYNSHIFT)
4268 /* Try to use a dynamic shift. */
4269 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4270 if (cost < best_cost)
4272 kind = 0;
4273 best_cost = cost;
4276 if (costp)
4277 *costp = cost;
4278 return kind;
4281 /* Function to be used in the length attribute of the instructions
4282 implementing this pattern. */
4284 shl_sext_length (rtx insn)
4286 rtx set_src, left_rtx, size_rtx;
4287 int cost;
4289 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4290 left_rtx = XEXP (XEXP (set_src, 0), 1);
4291 size_rtx = XEXP (set_src, 1);
4292 shl_sext_kind (left_rtx, size_rtx, &cost);
4293 return cost;
4296 /* Generate rtl for this pattern */
4297 bool
4298 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4300 int kind;
4301 int left, size, insize, cost;
4302 rtx operands[3];
4304 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4305 left = INTVAL (left_rtx);
4306 size = INTVAL (size_rtx);
4307 insize = size - left;
4308 switch (kind)
4310 case 1:
4311 case 2:
4312 case 3:
4313 case 4:
4315 int ext = kind & 1 ? 8 : 16;
4316 int shift2 = size - ext;
4318 /* Don't expand fine-grained when combining, because that will
4319 make the pattern fail. */
4320 if (! currently_expanding_to_rtl
4321 && ! reload_in_progress && ! reload_completed)
4323 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4324 emit_insn (gen_movsi (dest, source));
4325 break;
4327 if (dest != source)
4328 emit_insn (gen_movsi (dest, source));
4329 operands[0] = dest;
4330 if (ext - insize)
4332 operands[2] = GEN_INT (ext - insize);
4333 gen_shifty_hi_op (ASHIFT, operands);
4335 emit_insn (kind & 1
4336 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4337 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4338 if (kind <= 2)
4340 if (shift2)
4342 operands[2] = GEN_INT (shift2);
4343 gen_shifty_op (ASHIFT, operands);
4346 else
4348 if (shift2 > 0)
4350 if (EXT_SHIFT_SIGNED (shift2))
4352 operands[2] = GEN_INT (shift2 + 1);
4353 gen_shifty_op (ASHIFT, operands);
4354 operands[2] = const1_rtx;
4355 gen_shifty_op (ASHIFTRT, operands);
4356 break;
4358 operands[2] = GEN_INT (shift2);
4359 gen_shifty_hi_op (ASHIFT, operands);
4361 else if (shift2)
4363 operands[2] = GEN_INT (-shift2);
4364 gen_shifty_hi_op (LSHIFTRT, operands);
4366 emit_insn (size <= 8
4367 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4368 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4370 break;
4372 case 5:
4374 int i = 16 - size;
4375 if (! currently_expanding_to_rtl
4376 && ! reload_in_progress && ! reload_completed)
4377 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4378 else
4380 operands[0] = dest;
4381 operands[2] = GEN_INT (16 - insize);
4382 gen_shifty_hi_op (ASHIFT, operands);
4383 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4385 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4386 while (--i >= 0)
4387 gen_ashift (ASHIFTRT, 1, dest);
4388 break;
4390 case 6:
4391 case 7:
4392 /* Don't expand fine-grained when combining, because that will
4393 make the pattern fail. */
4394 if (! currently_expanding_to_rtl
4395 && ! reload_in_progress && ! reload_completed)
4397 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4398 emit_insn (gen_movsi (dest, source));
4399 break;
4401 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4402 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4403 emit_insn (gen_addsi3 (dest, dest, GEN_INT (HOST_WIDE_INT_M1U << (insize - 1))));
4404 operands[0] = dest;
4405 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4406 gen_shifty_op (ASHIFT, operands);
4407 if (kind == 7)
4408 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4409 break;
4410 default:
4411 return true;
4413 return false;
4416 typedef struct label_ref_list_d
4418 rtx_code_label *label;
4419 struct label_ref_list_d *next;
4420 } *label_ref_list_t;
4422 static object_allocator<label_ref_list_d> label_ref_list_d_pool
4423 ("label references list");
4425 /* The SH cannot load a large constant into a register, constants have to
4426 come from a pc relative load. The reference of a pc relative load
4427 instruction must be less than 1k in front of the instruction. This
4428 means that we often have to dump a constant inside a function, and
4429 generate code to branch around it.
4431 It is important to minimize this, since the branches will slow things
4432 down and make things bigger.
4434 Worst case code looks like:
4436 mov.l L1,rn
4437 bra L2
4439 align
4440 L1: .long value
4444 mov.l L3,rn
4445 bra L4
4447 align
4448 L3: .long value
4452 We fix this by performing a scan before scheduling, which notices which
4453 instructions need to have their operands fetched from the constant table
4454 and builds the table.
4456 The algorithm is:
4458 scan, find an instruction which needs a pcrel move. Look forward, find the
4459 last barrier which is within MAX_COUNT bytes of the requirement.
4460 If there isn't one, make one. Process all the instructions between
4461 the find and the barrier.
4463 In the above example, we can tell that L3 is within 1k of L1, so
4464 the first move can be shrunk from the 3 insn+constant sequence into
4465 just 1 insn, and the constant moved to L3 to make:
4467 mov.l L1,rn
4469 mov.l L3,rn
4470 bra L4
4472 align
4473 L3:.long value
4474 L4:.long value
4476 Then the second move becomes the target for the shortening process. */
4478 typedef struct
4480 rtx value; /* Value in table. */
4481 rtx_code_label *label; /* Label of value. */
4482 label_ref_list_t wend; /* End of window. */
4483 machine_mode mode; /* Mode of value. */
4485 /* True if this constant is accessed as part of a post-increment
4486 sequence. Note that HImode constants are never accessed in this way. */
4487 bool part_of_sequence_p;
4488 } pool_node;
4490 /* The maximum number of constants that can fit into one pool, since
4491 constants in the range 0..510 are at least 2 bytes long, and in the
4492 range from there to 1018 at least 4 bytes. */
4494 #define MAX_POOL_SIZE 372
4495 static pool_node pool_vector[MAX_POOL_SIZE];
4496 static int pool_size;
4497 static rtx_code_label *pool_window_label;
4498 static int pool_window_last;
4500 static int max_labelno_before_reorg;
4502 /* ??? If we need a constant in HImode which is the truncated value of a
4503 constant we need in SImode, we could combine the two entries thus saving
4504 two bytes. Is this common enough to be worth the effort of implementing
4505 it? */
4507 /* ??? This stuff should be done at the same time that we shorten branches.
4508 As it is now, we must assume that all branches are the maximum size, and
4509 this causes us to almost always output constant pools sooner than
4510 necessary. */
4512 /* Add a constant to the pool and return its label. */
4513 static rtx_code_label *
4514 add_constant (rtx x, machine_mode mode, rtx last_value)
4516 rtx_code_label *lab, *new_rtx;
4517 label_ref_list_t ref, newref;
4519 /* First see if we've already got it. */
4520 for (int i = 0; i < pool_size; i++)
4522 if (x->code == pool_vector[i].value->code
4523 && mode == pool_vector[i].mode)
4525 if (x->code == CODE_LABEL)
4527 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4528 continue;
4530 if (rtx_equal_p (x, pool_vector[i].value))
4532 lab = new_rtx = 0;
4533 if (! last_value
4534 || ! i
4535 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4537 new_rtx = gen_label_rtx ();
4538 LABEL_REFS (new_rtx) = pool_vector[i].label;
4539 pool_vector[i].label = lab = new_rtx;
4541 if (lab && pool_window_label)
4543 newref = label_ref_list_d_pool.allocate ();
4544 newref->label = pool_window_label;
4545 ref = pool_vector[pool_window_last].wend;
4546 newref->next = ref;
4547 pool_vector[pool_window_last].wend = newref;
4549 if (new_rtx)
4550 pool_window_label = new_rtx;
4551 pool_window_last = i;
4552 return lab;
4557 /* Need a new one. */
4558 pool_vector[pool_size].value = x;
4559 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4561 lab = 0;
4562 pool_vector[pool_size - 1].part_of_sequence_p = true;
4564 else
4565 lab = gen_label_rtx ();
4566 pool_vector[pool_size].mode = mode;
4567 pool_vector[pool_size].label = lab;
4568 pool_vector[pool_size].wend = NULL;
4569 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4570 if (lab && pool_window_label)
4572 newref = label_ref_list_d_pool.allocate ();
4573 newref->label = pool_window_label;
4574 ref = pool_vector[pool_window_last].wend;
4575 newref->next = ref;
4576 pool_vector[pool_window_last].wend = newref;
4578 if (lab)
4579 pool_window_label = lab;
4580 pool_window_last = pool_size;
4581 pool_size++;
4582 return lab;
4585 /* Output the literal table. START, if nonzero, is the first instruction
4586 this table is needed for, and also indicates that there is at least one
4587 casesi_worker_2 instruction; We have to emit the operand3 labels from
4588 these insns at a 4-byte aligned position. BARRIER is the barrier
4589 after which we are to place the table. */
4590 static void
4591 dump_table (rtx_insn *start, rtx_insn *barrier)
4593 rtx_insn *scan = barrier;
4594 bool need_align = true;
4595 rtx_code_label *lab;
4596 label_ref_list_t ref;
4597 bool have_df = false;
4599 /* Do two passes, first time dump out the HI sized constants. */
4601 for (int i = 0; i < pool_size; i++)
4603 pool_node *p = &pool_vector[i];
4605 if (p->mode == HImode)
4607 if (need_align)
4609 scan = emit_insn_after (gen_align_2 (), scan);
4610 need_align = false;
4612 for (lab = p->label; lab;
4613 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4614 scan = emit_label_after (lab, scan);
4615 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4616 scan);
4617 for (ref = p->wend; ref; ref = ref->next)
4619 lab = ref->label;
4620 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4623 else if (p->mode == DFmode)
4624 have_df = true;
4627 need_align = true;
4629 if (start)
4631 scan = emit_insn_after (gen_align_4 (), scan);
4632 need_align = false;
4633 for (; start != barrier; start = NEXT_INSN (start))
4634 if (NONJUMP_INSN_P (start)
4635 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4637 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4638 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4640 scan = emit_label_after (as_a <rtx_insn *> (lab), scan);
4643 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4645 rtx_insn *align_insn = NULL;
4647 scan = emit_label_after (gen_label_rtx (), scan);
4648 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4649 need_align = false;
4651 for (int i = 0; i < pool_size; i++)
4653 pool_node *p = &pool_vector[i];
4655 switch (p->mode)
4657 case E_HImode:
4658 break;
4659 case E_SImode:
4660 case E_SFmode:
4661 if (align_insn && !p->part_of_sequence_p)
4663 for (lab = p->label; lab;
4664 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4665 emit_label_before (lab, align_insn);
4666 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4667 align_insn);
4668 for (ref = p->wend; ref; ref = ref->next)
4670 lab = ref->label;
4671 emit_insn_before (gen_consttable_window_end (lab),
4672 align_insn);
4674 delete_insn (align_insn);
4675 align_insn = NULL;
4676 continue;
4678 else
4680 for (lab = p->label; lab;
4681 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4682 scan = emit_label_after (lab, scan);
4683 scan = emit_insn_after (gen_consttable_4 (p->value,
4684 const0_rtx), scan);
4685 need_align = ! need_align;
4687 break;
4688 case E_DFmode:
4689 if (need_align)
4691 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4692 align_insn = scan;
4693 need_align = false;
4695 /* FALLTHRU */
4696 case E_DImode:
4697 for (lab = p->label; lab;
4698 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4699 scan = emit_label_after (lab, scan);
4700 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4701 scan);
4702 break;
4703 default:
4704 gcc_unreachable ();
4707 if (p->mode != HImode)
4709 for (ref = p->wend; ref; ref = ref->next)
4711 lab = ref->label;
4712 scan = emit_insn_after (gen_consttable_window_end (lab),
4713 scan);
4718 pool_size = 0;
4721 for (int i = 0; i < pool_size; i++)
4723 pool_node *p = &pool_vector[i];
4725 switch (p->mode)
4727 case E_HImode:
4728 break;
4729 case E_SImode:
4730 case E_SFmode:
4731 if (need_align)
4733 need_align = false;
4734 scan = emit_label_after (gen_label_rtx (), scan);
4735 scan = emit_insn_after (gen_align_4 (), scan);
4737 for (lab = p->label; lab;
4738 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4739 scan = emit_label_after (lab, scan);
4740 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4741 scan);
4742 break;
4743 case E_DFmode:
4744 case E_DImode:
4745 if (need_align)
4747 need_align = false;
4748 scan = emit_label_after (gen_label_rtx (), scan);
4749 scan = emit_insn_after (gen_align_4 (), scan);
4751 for (lab = p->label; lab;
4752 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4753 scan = emit_label_after (lab, scan);
4754 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4755 scan);
4756 break;
4757 default:
4758 gcc_unreachable ();
4761 if (p->mode != HImode)
4763 for (ref = p->wend; ref; ref = ref->next)
4765 lab = ref->label;
4766 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4771 scan = emit_insn_after (gen_consttable_end (), scan);
4772 scan = emit_barrier_after (scan);
4773 pool_size = 0;
4774 pool_window_label = NULL;
4775 pool_window_last = 0;
4778 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4780 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4782 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4783 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4784 need to fix it if the input value is CONST_OK_FOR_I08. */
4785 static bool
4786 broken_move (rtx_insn *insn)
4788 if (NONJUMP_INSN_P (insn))
4790 rtx pat = PATTERN (insn);
4791 if (GET_CODE (pat) == PARALLEL)
4792 pat = XVECEXP (pat, 0, 0);
4793 if (GET_CODE (pat) == SET
4794 /* We can load any 8-bit value if we don't care what the high
4795 order bits end up as. */
4796 && GET_MODE (SET_DEST (pat)) != QImode
4797 && (CONSTANT_P (SET_SRC (pat))
4798 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4799 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4800 /* Match mova_const. */
4801 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4802 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4803 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4804 && ! (TARGET_SH2E
4805 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4806 && (fp_zero_operand (SET_SRC (pat))
4807 || fp_one_operand (SET_SRC (pat)))
4808 /* In general we don't know the current setting of fpscr, so
4809 disable fldi.
4810 There is an exception if this was a register-register move
4811 before reload - and hence it was ascertained that we have
4812 single precision setting - and in a post-reload optimization
4813 we changed this to do a constant load. In that case
4814 we don't have an r0 clobber, hence we must use fldi. */
4815 && (TARGET_FMOVD
4816 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4817 == SCRATCH))
4818 && REG_P (SET_DEST (pat))
4819 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4820 && ! (TARGET_SH2A
4821 && GET_MODE (SET_DEST (pat)) == SImode
4822 && (satisfies_constraint_I20 (SET_SRC (pat))
4823 || satisfies_constraint_I28 (SET_SRC (pat))))
4824 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4825 return true;
4828 return false;
4831 /* Return true if the specified insn is a mova insn. */
4832 static bool
4833 mova_p (rtx_insn *insn)
4835 return (NONJUMP_INSN_P (insn)
4836 && GET_CODE (PATTERN (insn)) == SET
4837 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4838 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4839 /* Don't match mova_const. */
4840 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4843 /* Fix up a mova from a switch that went out of range. */
4844 static void
4845 fixup_mova (rtx_insn *mova)
4847 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4848 if (! flag_pic)
4850 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4851 INSN_CODE (mova) = -1;
4853 else
4855 rtx_insn *worker = mova;
4856 rtx_code_label *lab = gen_label_rtx ();
4857 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4861 worker = NEXT_INSN (worker);
4862 gcc_assert (worker
4863 && !LABEL_P (worker)
4864 && !JUMP_P (worker));
4865 } while (NOTE_P (worker)
4866 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4867 wpat = PATTERN (worker);
4868 wpat0 = XVECEXP (wpat, 0, 0);
4869 wpat1 = XVECEXP (wpat, 0, 1);
4870 wsrc = SET_SRC (wpat0);
4871 PATTERN (worker) = (gen_casesi_worker_2
4872 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4873 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4874 XEXP (wpat1, 0)));
4875 INSN_CODE (worker) = -1;
4876 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4877 base = gen_rtx_LABEL_REF (Pmode, lab);
4878 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4879 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4880 INSN_CODE (mova) = -1;
4884 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4885 *num_mova, and check if the new mova is not nested within the first one.
4886 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4887 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4888 static int
4889 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
4891 int n_addr = 0; /* Initialization to shut up spurious warning. */
4892 int f_target, n_target = 0; /* Likewise. */
4894 if (optimize)
4896 /* If NEW_MOVA has no address yet, it will be handled later. */
4897 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4898 return -1;
4900 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4901 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4902 if (n_addr > n_target || n_addr + 1022 < n_target)
4904 /* Change the mova into a load.
4905 broken_move will then return true for it. */
4906 fixup_mova (new_mova);
4907 return 1;
4910 if (!(*num_mova)++)
4912 *first_mova = new_mova;
4913 return 2;
4915 if (!optimize
4916 || ((f_target
4917 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4918 >= n_target))
4919 return -1;
4921 (*num_mova)--;
4922 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4923 > n_target - n_addr)
4925 fixup_mova (*first_mova);
4926 return 0;
4928 else
4930 fixup_mova (new_mova);
4931 return 1;
4935 /* Find the last barrier from insn FROM which is close enough to hold the
4936 constant pool. If we can't find one, then create one near the end of
4937 the range. */
4938 static rtx_insn *
4939 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
4941 int count_si = 0;
4942 int count_hi = 0;
4943 int found_hi = 0;
4944 int found_si = 0;
4945 int hi_align = 2;
4946 int si_align = 2;
4947 int leading_mova = num_mova;
4948 rtx_insn *barrier_before_mova = NULL;
4949 rtx_insn *found_barrier = NULL;
4950 rtx_insn *good_barrier = NULL;
4951 int si_limit;
4952 int hi_limit;
4953 rtx_insn *orig = from;
4954 rtx_insn *last_got = NULL;
4955 rtx_insn *last_symoff = NULL;
4957 /* For HImode: range is 510, add 4 because pc counts from address of
4958 second instruction after this one, subtract 2 for the jump instruction
4959 that we may need to emit before the table, subtract 2 for the instruction
4960 that fills the jump delay slot (in very rare cases, reorg will take an
4961 instruction from after the constant pool or will leave the delay slot
4962 empty). This gives 510.
4963 For SImode: range is 1020, add 4 because pc counts from address of
4964 second instruction after this one, subtract 2 in case pc is 2 byte
4965 aligned, subtract 2 for the jump instruction that we may need to emit
4966 before the table, subtract 2 for the instruction that fills the jump
4967 delay slot. This gives 1018. */
4969 /* The branch will always be shortened now that the reference address for
4970 forward branches is the successor address, thus we need no longer make
4971 adjustments to the [sh]i_limit for -O0. */
4973 si_limit = 1018;
4974 hi_limit = 510;
4976 while (from && count_si < si_limit && count_hi < hi_limit)
4978 int inc = get_attr_length (from);
4979 int new_align = 1;
4981 /* If this is a label that existed at the time of the compute_alignments
4982 call, determine the alignment. N.B. When find_barrier recurses for
4983 an out-of-reach mova, we might see labels at the start of previously
4984 inserted constant tables. */
4985 if (LABEL_P (from)
4986 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4988 if (optimize)
4989 new_align = 1 << label_to_alignment (from);
4990 else if (BARRIER_P (prev_nonnote_insn (from)))
4991 new_align = 1 << barrier_align (from);
4992 else
4993 new_align = 1;
4994 inc = 0;
4996 /* In case we are scanning a constant table because of recursion, check
4997 for explicit alignments. If the table is long, we might be forced
4998 to emit the new table in front of it; the length of the alignment
4999 might be the last straw. */
5000 else if (NONJUMP_INSN_P (from)
5001 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5002 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5003 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5004 /* When we find the end of a constant table, paste the new constant
5005 at the end. That is better than putting it in front because
5006 this way, we don't need extra alignment for adding a 4-byte-aligned
5007 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5008 else if (NONJUMP_INSN_P (from)
5009 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5010 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5011 return from;
5013 if (BARRIER_P (from))
5015 rtx_insn *next;
5017 found_barrier = from;
5019 /* If we are at the end of the function, or in front of an alignment
5020 instruction, we need not insert an extra alignment. We prefer
5021 this kind of barrier. */
5022 if (barrier_align (from) > 2)
5023 good_barrier = from;
5025 /* If we are at the end of a hot/cold block, dump the constants
5026 here. */
5027 next = NEXT_INSN (from);
5028 if (next
5029 && NOTE_P (next)
5030 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5031 break;
5034 if (broken_move (from))
5036 rtx pat, src, dst;
5037 machine_mode mode;
5039 pat = PATTERN (from);
5040 if (GET_CODE (pat) == PARALLEL)
5041 pat = XVECEXP (pat, 0, 0);
5042 src = SET_SRC (pat);
5043 dst = SET_DEST (pat);
5044 mode = GET_MODE (dst);
5046 /* GOT pcrelat setting comes in pair of
5047 mova .L8,r0
5048 mov.l .L8,r12
5049 instructions. (plus add r0,r12).
5050 Remember if we see one without the other. */
5051 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5052 last_got = last_got ? NULL : from;
5053 else if (PIC_ADDR_P (src))
5054 last_got = last_got ? NULL : from;
5056 /* We must explicitly check the mode, because sometimes the
5057 front end will generate code to load unsigned constants into
5058 HImode targets without properly sign extending them. */
5059 if (mode == HImode
5060 || (mode == SImode && satisfies_constraint_I16 (src)
5061 && REGNO (dst) != FPUL_REG))
5063 found_hi += 2;
5064 /* We put the short constants before the long constants, so
5065 we must count the length of short constants in the range
5066 for the long constants. */
5067 /* ??? This isn't optimal, but is easy to do. */
5068 si_limit -= 2;
5070 else
5072 /* We dump DF/DI constants before SF/SI ones, because
5073 the limit is the same, but the alignment requirements
5074 are higher. We may waste up to 4 additional bytes
5075 for alignment, and the DF/DI constant may have
5076 another SF/SI constant placed before it. */
5077 while (si_align > 2 && found_si + si_align - 2 > count_si)
5078 si_align >>= 1;
5079 if (found_si > count_si)
5080 count_si = found_si;
5081 found_si += GET_MODE_SIZE (mode);
5082 if (num_mova)
5083 si_limit -= GET_MODE_SIZE (mode);
5087 if (mova_p (from))
5089 switch (untangle_mova (&num_mova, &mova, from))
5091 case 1:
5092 if (flag_pic)
5094 rtx src = SET_SRC (PATTERN (from));
5095 if (GET_CODE (src) == CONST
5096 && GET_CODE (XEXP (src, 0)) == UNSPEC
5097 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5098 last_symoff = from;
5100 break;
5101 case 0: return find_barrier (0, 0, mova);
5102 case 2:
5104 leading_mova = 0;
5105 barrier_before_mova
5106 = good_barrier ? good_barrier : found_barrier;
5108 default: break;
5110 if (found_si > count_si)
5111 count_si = found_si;
5113 else if (JUMP_TABLE_DATA_P (from)
5114 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5116 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5117 || (num_mova
5118 && (prev_nonnote_insn (from)
5119 == XEXP (MOVA_LABELREF (mova), 0))))
5120 num_mova--;
5121 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5123 /* We have just passed the barrier in front of the
5124 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5125 the ADDR_DIFF_VEC is accessed as data, just like our pool
5126 constants, this is a good opportunity to accommodate what
5127 we have gathered so far.
5128 If we waited any longer, we could end up at a barrier in
5129 front of code, which gives worse cache usage for separated
5130 instruction / data caches. */
5131 good_barrier = found_barrier;
5132 break;
5134 else
5136 rtx body = PATTERN (from);
5137 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5140 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5141 else if (JUMP_P (from)
5142 && ! TARGET_SH2
5143 && ! optimize_size)
5144 new_align = 4;
5146 /* There is a possibility that a bf is transformed into a bf/s by the
5147 delay slot scheduler. */
5148 if (JUMP_P (from)
5149 && get_attr_type (from) == TYPE_CBRANCH
5150 && ! sequence_insn_p (from))
5151 inc += 2;
5153 if (found_si)
5155 count_si += inc;
5156 if (new_align > si_align)
5158 si_limit -= (count_si - 1) & (new_align - si_align);
5159 si_align = new_align;
5161 count_si = (count_si + new_align - 1) & -new_align;
5163 if (found_hi)
5165 count_hi += inc;
5166 if (new_align > hi_align)
5168 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5169 hi_align = new_align;
5171 count_hi = (count_hi + new_align - 1) & -new_align;
5173 from = NEXT_INSN (from);
5176 if (num_mova)
5178 if (leading_mova)
5180 /* Try as we might, the leading mova is out of range. Change
5181 it into a load (which will become a pcload) and retry. */
5182 fixup_mova (mova);
5183 return find_barrier (0, 0, mova);
5185 else
5187 /* Insert the constant pool table before the mova instruction,
5188 to prevent the mova label reference from going out of range. */
5189 from = mova;
5190 good_barrier = found_barrier = barrier_before_mova;
5194 if (found_barrier)
5196 if (good_barrier && next_real_insn (found_barrier))
5197 found_barrier = good_barrier;
5199 else
5201 /* We didn't find a barrier in time to dump our stuff,
5202 so we'll make one. */
5203 rtx_code_label *label = gen_label_rtx ();
5205 /* Don't emit a constant table in the middle of insns for
5206 casesi_worker_2. This is a bit overkill but is enough
5207 because casesi_worker_2 wouldn't appear so frequently. */
5208 if (last_symoff)
5209 from = last_symoff;
5211 /* If we exceeded the range, then we must back up over the last
5212 instruction we looked at. Otherwise, we just need to undo the
5213 NEXT_INSN at the end of the loop. */
5214 if (PREV_INSN (from) != orig
5215 && (count_hi > hi_limit || count_si > si_limit))
5216 from = PREV_INSN (PREV_INSN (from));
5217 else
5218 from = PREV_INSN (from);
5220 /* Don't emit a constant table int the middle of global pointer setting,
5221 since that that would move the addressing base GOT into another table.
5222 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5223 in the pool anyway, so just move up the whole constant pool.
5225 However, avoid doing so when the last single GOT mov is the starting
5226 insn itself. Going past above the start insn would create a negative
5227 offset, causing errors. */
5228 if (last_got && last_got != orig)
5229 from = PREV_INSN (last_got);
5231 /* Don't insert the constant pool table at the position which
5232 may be the landing pad. */
5233 if (flag_exceptions
5234 && CALL_P (from)
5235 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5236 from = PREV_INSN (from);
5238 /* Walk back to be just before any jump or label.
5239 Putting it before a label reduces the number of times the branch
5240 around the constant pool table will be hit. Putting it before
5241 a jump makes it more likely that the bra delay slot will be
5242 filled. */
5243 while (NOTE_P (from) || JUMP_P (from) || LABEL_P (from))
5244 from = PREV_INSN (from);
5246 if (CALL_P (from))
5248 bool sibcall_p = SIBLING_CALL_P (from);
5250 /* If FROM was a sibling call, then we know that control
5251 will not return. In fact, we were guaranteed to hit
5252 a barrier before another real insn.
5254 The jump around the constant pool is unnecessary. It
5255 costs space, but more importantly it confuses dwarf2cfi
5256 generation. */
5257 if (sibcall_p)
5258 return emit_barrier_after (from);
5261 from = emit_jump_insn_after (gen_jump (label), from);
5262 JUMP_LABEL (from) = label;
5263 LABEL_NUSES (label) = 1;
5264 found_barrier = emit_barrier_after (from);
5265 emit_label_after (label, found_barrier);
5268 return found_barrier;
5271 /* If the instruction INSN is implemented by a special function, and we can
5272 positively find the register that is used to call the sfunc, and this
5273 register is not used anywhere else in this instruction - except as the
5274 destination of a set, return this register; else, return 0. */
5276 sfunc_uses_reg (rtx_insn *insn)
5278 int i;
5279 rtx pattern, part, reg_part, reg;
5281 if (!NONJUMP_INSN_P (insn))
5282 return NULL_RTX;
5283 pattern = PATTERN (insn);
5284 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5285 return NULL_RTX;
5287 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5289 part = XVECEXP (pattern, 0, i);
5290 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5291 reg_part = part;
5293 if (! reg_part)
5294 return NULL_RTX;
5295 reg = XEXP (reg_part, 0);
5296 for (int i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5298 part = XVECEXP (pattern, 0, i);
5299 if (part == reg_part || GET_CODE (part) == CLOBBER)
5300 continue;
5301 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5302 && REG_P (SET_DEST (part)))
5303 ? SET_SRC (part) : part)))
5304 return NULL_RTX;
5306 return reg;
5309 /* See if the only way in which INSN uses REG is by calling it, or by
5310 setting it while calling it. Set *SET to a SET rtx if the register
5311 is set by INSN. */
5312 static bool
5313 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5315 *set = NULL_RTX;
5317 rtx reg2 = sfunc_uses_reg (insn);
5318 if (reg2 && REGNO (reg2) == REGNO (reg))
5320 rtx pattern = single_set (insn);
5321 if (pattern
5322 && REG_P (SET_DEST (pattern))
5323 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5324 *set = pattern;
5325 return false;
5327 if (!CALL_P (insn))
5329 /* We don't use rtx_equal_p because we don't care if the mode is
5330 different. */
5331 rtx pattern = single_set (insn);
5332 if (pattern
5333 && REG_P (SET_DEST (pattern))
5334 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5336 rtx par, part;
5337 int i;
5339 *set = pattern;
5340 par = PATTERN (insn);
5341 if (GET_CODE (par) == PARALLEL)
5342 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5344 part = XVECEXP (par, 0, i);
5345 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5346 return true;
5348 return reg_mentioned_p (reg, SET_SRC (pattern));
5351 return true;
5354 rtx pattern = PATTERN (insn);
5356 if (GET_CODE (pattern) == PARALLEL)
5358 for (int i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5359 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5360 return true;
5361 pattern = XVECEXP (pattern, 0, 0);
5364 if (GET_CODE (pattern) == SET)
5366 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5368 /* We don't use rtx_equal_p, because we don't care if the
5369 mode is different. */
5370 if (!REG_P (SET_DEST (pattern))
5371 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5372 return true;
5374 *set = pattern;
5377 pattern = SET_SRC (pattern);
5380 if (GET_CODE (pattern) != CALL
5381 || !MEM_P (XEXP (pattern, 0))
5382 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5383 return true;
5385 return false;
5388 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5389 general registers. Bits 0..15 mean that the respective registers
5390 are used as inputs in the instruction. Bits 16..31 mean that the
5391 registers 0..15, respectively, are used as outputs, or are clobbered.
5392 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5394 regs_used (rtx x, int is_dest)
5396 enum rtx_code code;
5397 const char *fmt;
5398 int used = 0;
5400 if (! x)
5401 return used;
5402 code = GET_CODE (x);
5403 switch (code)
5405 case REG:
5406 if (REGNO (x) < 16)
5407 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5408 << (REGNO (x) + is_dest));
5409 return 0;
5410 case SUBREG:
5412 rtx y = SUBREG_REG (x);
5414 if (!REG_P (y))
5415 break;
5416 if (REGNO (y) < 16)
5417 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5418 << (REGNO (y) +
5419 subreg_regno_offset (REGNO (y),
5420 GET_MODE (y),
5421 SUBREG_BYTE (x),
5422 GET_MODE (x)) + is_dest));
5423 return 0;
5425 case SET:
5426 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5427 case RETURN:
5428 /* If there was a return value, it must have been indicated with USE. */
5429 return 0x00ffff00;
5430 case CLOBBER:
5431 is_dest = 1;
5432 break;
5433 case MEM:
5434 is_dest = 0;
5435 break;
5436 case CALL:
5437 used |= 0x00ff00f0;
5438 break;
5439 default:
5440 break;
5443 fmt = GET_RTX_FORMAT (code);
5445 for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5447 if (fmt[i] == 'E')
5449 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
5450 used |= regs_used (XVECEXP (x, i, j), is_dest);
5452 else if (fmt[i] == 'e')
5453 used |= regs_used (XEXP (x, i), is_dest);
5455 return used;
5458 /* Create an instruction that prevents redirection of a conditional branch
5459 to the destination of the JUMP with address ADDR.
5460 If the branch needs to be implemented as an indirect jump, try to find
5461 a scratch register for it.
5462 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5463 If any preceding insn that doesn't fit into a delay slot is good enough,
5464 pass 1. Pass 2 if a definite blocking insn is needed.
5465 -1 is used internally to avoid deep recursion.
5466 If a blocking instruction is made or recognized, return it. */
5467 static rtx_insn *
5468 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5470 int dead = 0;
5471 rtx_insn *prev = prev_nonnote_insn (jump);
5473 /* First, check if we already have an instruction that satisfies our need. */
5474 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5476 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5477 return prev;
5478 if (GET_CODE (PATTERN (prev)) == USE
5479 || GET_CODE (PATTERN (prev)) == CLOBBER
5480 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5481 prev = jump;
5482 else if ((need_block &= ~1) < 0)
5483 return prev;
5484 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5485 need_block = 0;
5487 if (GET_CODE (PATTERN (jump)) == RETURN)
5489 if (! need_block)
5490 return prev;
5491 /* Reorg even does nasty things with return insns that cause branches
5492 to go out of range - see find_end_label and callers. */
5493 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5495 /* We can't use JUMP_LABEL here because it might be undefined
5496 when not optimizing. */
5497 rtx dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5498 /* If the branch is out of range, try to find a scratch register for it. */
5499 if (optimize
5500 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5501 > 4092 + 4098))
5503 rtx_insn *scan;
5504 /* Don't look for the stack pointer as a scratch register,
5505 it would cause trouble if an interrupt occurred. */
5506 unsigned attempt = 0x7fff, used;
5507 int jump_left = flag_expensive_optimizations + 1;
5509 /* It is likely that the most recent eligible instruction is wanted for
5510 the delay slot. Therefore, find out which registers it uses, and
5511 try to avoid using them. */
5513 for (scan = jump; (scan = PREV_INSN (scan)); )
5515 if (scan->deleted ())
5516 continue;
5517 rtx_code code = GET_CODE (scan);
5518 if (code == CODE_LABEL || code == JUMP_INSN)
5519 break;
5520 if (code == INSN
5521 && GET_CODE (PATTERN (scan)) != USE
5522 && GET_CODE (PATTERN (scan)) != CLOBBER
5523 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5525 attempt &= ~regs_used (PATTERN (scan), 0);
5526 break;
5529 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5530 (scan = NEXT_INSN (scan)); )
5532 if (scan->deleted ())
5533 continue;
5534 rtx_code code = GET_CODE (scan);
5535 if (INSN_P (scan))
5537 used |= regs_used (PATTERN (scan), 0);
5538 if (code == CALL_INSN)
5539 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5540 dead |= (used >> 16) & ~used;
5541 if (dead & attempt)
5543 dead &= attempt;
5544 break;
5546 if (code == JUMP_INSN)
5548 if (jump_left-- && simplejump_p (scan))
5549 scan = JUMP_LABEL_AS_INSN (scan);
5550 else
5551 break;
5555 /* Mask out the stack pointer again, in case it was
5556 the only 'free' register we have found. */
5557 dead &= 0x7fff;
5559 /* If the immediate destination is still in range, check for possible
5560 threading with a jump beyond the delay slot insn.
5561 Don't check if we are called recursively; the jump has been or will be
5562 checked in a different invocation then. */
5564 else if (optimize && need_block >= 0)
5566 rtx_insn *next = next_active_insn (as_a<rtx_insn *> (dest));
5567 next = next_active_insn (next);
5568 if (next && JUMP_P (next)
5569 && GET_CODE (PATTERN (next)) == SET
5570 && recog_memoized (next) == CODE_FOR_jump_compact)
5572 dest = JUMP_LABEL (next);
5573 if (dest
5574 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5575 > 4092 + 4098))
5576 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5580 if (dead)
5582 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5584 /* It would be nice if we could convert the jump into an indirect
5585 jump / far branch right now, and thus exposing all constituent
5586 instructions to further optimization. However, reorg uses
5587 simplejump_p to determine if there is an unconditional jump where
5588 it should try to schedule instructions from the target of the
5589 branch; simplejump_p fails for indirect jumps even if they have
5590 a JUMP_LABEL. */
5591 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5592 (reg, GEN_INT (unspec_bbr_uid++)),
5593 jump);
5594 /* ??? We would like this to have the scope of the jump, but that
5595 scope will change when a delay slot insn of an inner scope is added.
5596 Hence, after delay slot scheduling, we'll have to expect
5597 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5598 the jump. */
5600 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5601 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5602 return insn;
5604 else if (need_block)
5605 /* We can't use JUMP_LABEL here because it might be undefined
5606 when not optimizing. */
5607 return emit_insn_before (gen_block_branch_redirect
5608 (GEN_INT (unspec_bbr_uid++)),
5609 jump);
5610 return prev;
5613 #define CONDJUMP_MIN -252
5614 #define CONDJUMP_MAX 262
5615 struct far_branch
5617 /* A label (to be placed) in front of the jump
5618 that jumps to our ultimate destination. */
5619 rtx_insn *near_label;
5620 /* Where we are going to insert it if we cannot move the jump any farther,
5621 or the jump itself if we have picked up an existing jump. */
5622 rtx_insn *insert_place;
5623 /* The ultimate destination. */
5624 rtx_insn *far_label;
5625 struct far_branch *prev;
5626 /* If the branch has already been created, its address;
5627 else the address of its first prospective user. */
5628 int address;
5631 enum mdep_reorg_phase_e mdep_reorg_phase;
5633 static void
5634 gen_far_branch (struct far_branch *bp)
5636 rtx_insn *insn = bp->insert_place;
5637 rtx_jump_insn *jump;
5638 rtx_code_label *label = gen_label_rtx ();
5640 emit_label_after (label, insn);
5641 if (bp->far_label)
5643 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5644 LABEL_NUSES (bp->far_label)++;
5646 else
5647 jump = emit_jump_insn_after (gen_return (), insn);
5649 /* Emit a barrier so that reorg knows that any following instructions
5650 are not reachable via a fall-through path.
5651 But don't do this when not optimizing, since we wouldn't suppress the
5652 alignment for the barrier then, and could end up with out-of-range
5653 pc-relative loads. */
5654 if (optimize)
5655 emit_barrier_after (jump);
5656 emit_label_after (bp->near_label, insn);
5658 if (bp->far_label)
5659 JUMP_LABEL (jump) = bp->far_label;
5660 else
5662 rtx pat = PATTERN (jump);
5663 gcc_assert (ANY_RETURN_P (pat));
5664 JUMP_LABEL (jump) = pat;
5667 bool ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5668 gcc_assert (ok);
5670 /* If we are branching around a jump (rather than a return), prevent
5671 reorg from using an insn from the jump target as the delay slot insn -
5672 when reorg did this, it pessimized code (we rather hide the delay slot)
5673 and it could cause branches to go out of range. */
5674 if (bp->far_label)
5675 (emit_insn_after
5676 (gen_stuff_delay_slot
5677 (GEN_INT (unspec_bbr_uid++),
5678 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5679 insn));
5680 /* Prevent reorg from undoing our splits. */
5681 gen_block_redirect (jump, bp->address += 2, 2);
5684 /* Fix up ADDR_DIFF_VECs. */
5685 void
5686 fixup_addr_diff_vecs (rtx_insn *first)
5688 rtx_insn *insn;
5690 for (insn = first; insn; insn = NEXT_INSN (insn))
5692 rtx vec_lab, pat, prevpat, x, braf_label;
5693 rtx_insn *prev;
5695 if (! JUMP_TABLE_DATA_P (insn)
5696 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5697 continue;
5698 pat = PATTERN (insn);
5699 vec_lab = XEXP (XEXP (pat, 0), 0);
5701 /* Search the matching casesi_jump_2. */
5702 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5704 if (!JUMP_P (prev))
5705 continue;
5706 prevpat = PATTERN (prev);
5707 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5708 continue;
5709 x = XVECEXP (prevpat, 0, 1);
5710 if (GET_CODE (x) != USE)
5711 continue;
5712 x = XEXP (x, 0);
5713 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5714 break;
5716 /* FIXME: This is a bug in the optimizer, but it seems harmless
5717 to just avoid panicing. */
5718 if (!prev)
5719 continue;
5721 /* Emit the reference label of the braf where it belongs, right after
5722 the casesi_jump_2 (i.e. braf). */
5723 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5724 emit_label_after (as_a <rtx_insn *> (braf_label), prev);
5726 /* Fix up the ADDR_DIF_VEC to be relative
5727 to the reference address of the braf. */
5728 XEXP (XEXP (pat, 0), 0) = braf_label;
5732 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5733 a barrier. Return the base 2 logarithm of the desired alignment. */
5735 barrier_align (rtx_insn *barrier_or_label)
5737 if (! barrier_or_label)
5738 return 0;
5740 if (LABEL_P (barrier_or_label)
5741 && NEXT_INSN (barrier_or_label)
5742 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5743 return 2;
5745 if (BARRIER_P (barrier_or_label)
5746 && PREV_INSN (barrier_or_label)
5747 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5749 rtx pat = PATTERN (PREV_INSN (barrier_or_label));
5750 /* If this is a very small table, we want to keep the alignment after
5751 the table to the minimum for proper code alignment. */
5752 return ((optimize_size
5753 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5754 <= (unsigned) 1 << (CACHE_LOG - 2)))
5755 ? 1 : align_jumps_log);
5758 rtx_insn *next = next_active_insn (barrier_or_label);
5760 if (! next)
5761 return 0;
5763 rtx pat = PATTERN (next);
5765 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5766 /* This is a barrier in front of a constant table. */
5767 return 0;
5769 if (optimize_size)
5770 return 0;
5772 if (! TARGET_SH2 || ! optimize)
5773 return align_jumps_log;
5775 /* When fixing up pcloads, a constant table might be inserted just before
5776 the basic block that ends with the barrier. Thus, we can't trust the
5777 instruction lengths before that. */
5778 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5780 /* Check if there is an immediately preceding branch to the insn beyond
5781 the barrier. We must weight the cost of discarding useful information
5782 from the current cache line when executing this branch and there is
5783 an alignment, against that of fetching unneeded insn in front of the
5784 branch target when there is no alignment. */
5786 /* There are two delay_slot cases to consider. One is the simple case
5787 where the preceding branch is to the insn beyond the barrier (simple
5788 delay slot filling), and the other is where the preceding branch has
5789 a delay slot that is a duplicate of the insn after the barrier
5790 (fill_eager_delay_slots) and the branch is to the insn after the insn
5791 after the barrier. */
5793 int slot, credit;
5794 bool jump_to_next = false;
5796 /* Skip to the insn before the JUMP_INSN before the barrier under
5797 investigation. */
5798 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5800 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5801 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5802 prev = prev_real_insn (prev))
5804 jump_to_next = false;
5805 if (GET_CODE (PATTERN (prev)) == USE
5806 || GET_CODE (PATTERN (prev)) == CLOBBER)
5807 continue;
5808 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5810 prev = prev_seq->insn (1);
5811 if (INSN_UID (prev) == INSN_UID (next))
5813 /* Delay slot was filled with insn at jump target. */
5814 jump_to_next = true;
5815 continue;
5819 if (slot &&
5820 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5821 slot = 0;
5822 credit -= get_attr_length (prev);
5824 if (prev && jump_to_label_p (prev))
5826 rtx_insn *x;
5827 if (jump_to_next
5828 || next_real_insn (JUMP_LABEL_AS_INSN (prev)) == next
5829 /* If relax_delay_slots() decides NEXT was redundant
5830 with some previous instruction, it will have
5831 redirected PREV's jump to the following insn. */
5832 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5833 /* There is no upper bound on redundant instructions
5834 that might have been skipped, but we must not put an
5835 alignment where none had been before. */
5836 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5837 (INSN_P (x)
5838 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5839 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5840 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5842 rtx pat = PATTERN (prev);
5843 if (GET_CODE (pat) == PARALLEL)
5844 pat = XVECEXP (pat, 0, 0);
5845 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5846 return 0;
5851 return align_jumps_log;
5854 /* If we are inside a phony loop, almost any kind of label can turn up as the
5855 first one in the loop. Aligning a braf label causes incorrect switch
5856 destination addresses; we can detect braf labels because they are
5857 followed by a BARRIER.
5858 Applying loop alignment to small constant or switch tables is a waste
5859 of space, so we suppress this too. */
5861 sh_loop_align (rtx_insn *label)
5863 rtx_insn *next = label;
5865 if (! optimize || optimize_size)
5866 return 0;
5869 next = next_nonnote_insn (next);
5870 while (next && LABEL_P (next));
5872 if (! next
5873 || ! INSN_P (next)
5874 || recog_memoized (next) == CODE_FOR_consttable_2)
5875 return 0;
5877 return align_loops_log;
5880 /* Do a final pass over the function, just before delayed branch
5881 scheduling. */
5882 static void
5883 sh_reorg (void)
5885 rtx_insn *first, *insn, *mova = NULL;
5886 int num_mova;
5887 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5888 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5890 first = get_insns ();
5891 max_labelno_before_reorg = max_label_num ();
5893 /* We must split call insns before introducing `mova's. If we're
5894 optimizing, they'll have already been split. Otherwise, make
5895 sure we don't split them too late. */
5896 if (! optimize)
5897 split_all_insns_noflow ();
5899 /* If relaxing, generate pseudo-ops to associate function calls with
5900 the symbols they call. It does no harm to not generate these
5901 pseudo-ops. However, when we can generate them, it enables the
5902 linker to potentially relax the jsr to a bsr, and eliminate the
5903 register load and, possibly, the constant pool entry. */
5905 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5906 if (TARGET_RELAX)
5908 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5909 own purposes. This works because none of the remaining passes
5910 need to look at them.
5912 ??? But it may break in the future. We should use a machine
5913 dependent REG_NOTE, or some other approach entirely. */
5914 for (insn = first; insn; insn = NEXT_INSN (insn))
5916 if (INSN_P (insn))
5918 rtx note;
5920 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5921 NULL_RTX)) != 0)
5922 remove_note (insn, note);
5926 for (insn = first; insn; insn = NEXT_INSN (insn))
5928 rtx pattern, reg, set, dies;
5929 rtx_code_label *label;
5930 rtx_insn *link, *scan;
5931 int rescan = 0, foundinsn = 0;
5933 if (CALL_P (insn))
5935 pattern = PATTERN (insn);
5937 if (GET_CODE (pattern) == PARALLEL)
5938 pattern = XVECEXP (pattern, 0, 0);
5939 if (GET_CODE (pattern) == SET)
5940 pattern = SET_SRC (pattern);
5942 if (GET_CODE (pattern) != CALL
5943 || !MEM_P (XEXP (pattern, 0)))
5944 continue;
5946 reg = XEXP (XEXP (pattern, 0), 0);
5948 else
5950 reg = sfunc_uses_reg (insn);
5951 if (! reg)
5952 continue;
5955 if (!REG_P (reg))
5956 continue;
5958 /* Try scanning backward to find where the register is set. */
5959 link = NULL;
5960 for (scan = PREV_INSN (insn);
5961 scan && !LABEL_P (scan);
5962 scan = PREV_INSN (scan))
5964 if (! INSN_P (scan))
5965 continue;
5967 if (! reg_mentioned_p (reg, scan))
5968 continue;
5970 if (noncall_uses_reg (reg, scan, &set))
5971 break;
5973 if (set)
5975 link = scan;
5976 break;
5980 if (! link)
5981 continue;
5983 /* The register is set at LINK. */
5985 /* We can only optimize the function call if the register is
5986 being set to a symbol. In theory, we could sometimes
5987 optimize calls to a constant location, but the assembler
5988 and linker do not support that at present. */
5989 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5990 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5991 continue;
5993 /* Scan forward from LINK to the place where REG dies, and
5994 make sure that the only insns which use REG are
5995 themselves function calls. */
5997 /* ??? This doesn't work for call targets that were allocated
5998 by reload, since there may not be a REG_DEAD note for the
5999 register. */
6001 dies = NULL_RTX;
6002 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6004 rtx scanset;
6006 /* Don't try to trace forward past a CODE_LABEL if we haven't
6007 seen INSN yet. Ordinarily, we will only find the setting insn
6008 if it is in the same basic block. However,
6009 cross-jumping can insert code labels in between the load and
6010 the call, and can result in situations where a single call
6011 insn may have two targets depending on where we came from. */
6013 if (LABEL_P (scan) && ! foundinsn)
6014 break;
6016 if (! INSN_P (scan))
6017 continue;
6019 /* Don't try to trace forward past a JUMP. To optimize
6020 safely, we would have to check that all the
6021 instructions at the jump destination did not use REG. */
6023 if (JUMP_P (scan))
6024 break;
6026 if (! reg_mentioned_p (reg, scan))
6027 continue;
6029 if (noncall_uses_reg (reg, scan, &scanset))
6030 break;
6032 if (scan == insn)
6033 foundinsn = 1;
6035 if (scan != insn
6036 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6038 /* There is a function call to this register other
6039 than the one we are checking. If we optimize
6040 this call, we need to rescan again below. */
6041 rescan = 1;
6044 /* ??? We shouldn't have to worry about SCANSET here.
6045 We should just be able to check for a REG_DEAD note
6046 on a function call. However, the REG_DEAD notes are
6047 apparently not dependable around libcalls; c-torture
6048 execute/920501-2 is a test case. If SCANSET is set,
6049 then this insn sets the register, so it must have
6050 died earlier. Unfortunately, this will only handle
6051 the cases in which the register is, in fact, set in a
6052 later insn. */
6054 /* ??? We shouldn't have to use FOUNDINSN here.
6055 This dates back to when we used LOG_LINKS to find
6056 the most recent insn which sets the register. */
6058 if (foundinsn
6059 && (scanset
6060 || find_reg_note (scan, REG_DEAD, reg)))
6062 dies = scan;
6063 break;
6067 if (! dies)
6069 /* Either there was a branch, or some insn used REG
6070 other than as a function call address. */
6071 continue;
6074 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6075 on the insn which sets the register, and on each call insn
6076 which uses the register. In final_prescan_insn we look for
6077 the REG_LABEL_OPERAND notes, and output the appropriate label
6078 or pseudo-op. */
6080 label = gen_label_rtx ();
6081 add_reg_note (link, REG_LABEL_OPERAND, label);
6082 add_reg_note (insn, REG_LABEL_OPERAND, label);
6083 if (rescan)
6085 scan = link;
6088 rtx reg2;
6090 scan = NEXT_INSN (scan);
6091 if (scan != insn
6092 && ((CALL_P (scan)
6093 && reg_mentioned_p (reg, scan))
6094 || ((reg2 = sfunc_uses_reg (scan))
6095 && REGNO (reg2) == REGNO (reg))))
6096 add_reg_note (scan, REG_LABEL_OPERAND, label);
6098 while (scan != dies);
6103 if (TARGET_SH2)
6104 fixup_addr_diff_vecs (first);
6106 if (optimize)
6108 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6109 shorten_branches (first);
6112 /* Scan the function looking for move instructions which have to be
6113 changed to pc-relative loads and insert the literal tables. */
6114 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6115 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6117 if (mova_p (insn))
6119 /* ??? basic block reordering can move a switch table dispatch
6120 below the switch table. Check if that has happened.
6121 We only have the addresses available when optimizing; but then,
6122 this check shouldn't be needed when not optimizing. */
6123 if (!untangle_mova (&num_mova, &mova, insn))
6125 insn = mova;
6126 num_mova = 0;
6129 else if (JUMP_TABLE_DATA_P (insn)
6130 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6131 && num_mova
6132 /* ??? loop invariant motion can also move a mova out of a
6133 loop. Since loop does this code motion anyway, maybe we
6134 should wrap UNSPEC_MOVA into a CONST, so that reload can
6135 move it back. */
6136 && ((num_mova > 1
6137 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6138 || (prev_nonnote_insn (insn)
6139 == XEXP (MOVA_LABELREF (mova), 0))))
6141 rtx_insn *scan;
6142 int total;
6144 num_mova--;
6146 /* Some code might have been inserted between the mova and
6147 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6148 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6149 total += get_attr_length (scan);
6151 /* range of mova is 1020, add 4 because pc counts from address of
6152 second instruction after this one, subtract 2 in case pc is 2
6153 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6154 cancels out with alignment effects of the mova itself. */
6155 if (total > 1022)
6157 /* Change the mova into a load, and restart scanning
6158 there. broken_move will then return true for mova. */
6159 fixup_mova (mova);
6160 insn = mova;
6163 if (broken_move (insn)
6164 || (NONJUMP_INSN_P (insn)
6165 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6167 rtx_insn *scan;
6168 /* Scan ahead looking for a barrier to stick the constant table
6169 behind. */
6170 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6171 rtx_insn *last_float_move = NULL;
6172 rtx last_float = 0, *last_float_addr = NULL;
6173 int need_aligned_label = 0;
6175 if (num_mova && ! mova_p (mova))
6177 /* find_barrier had to change the first mova into a
6178 pcload; thus, we have to start with this new pcload. */
6179 insn = mova;
6180 num_mova = 0;
6182 /* Now find all the moves between the points and modify them. */
6183 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6185 if (LABEL_P (scan))
6186 last_float = 0;
6187 if (NONJUMP_INSN_P (scan)
6188 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6189 need_aligned_label = 1;
6190 if (broken_move (scan))
6192 rtx *patp = &PATTERN (scan), pat = *patp;
6193 rtx src, dst;
6194 rtx lab;
6195 rtx newsrc;
6196 machine_mode mode;
6198 if (GET_CODE (pat) == PARALLEL)
6199 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6200 src = SET_SRC (pat);
6201 dst = SET_DEST (pat);
6202 mode = GET_MODE (dst);
6204 if (mode == SImode && satisfies_constraint_I16 (src)
6205 && REGNO (dst) != FPUL_REG)
6207 int offset = 0;
6209 mode = HImode;
6210 while (GET_CODE (dst) == SUBREG)
6212 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6213 GET_MODE (SUBREG_REG (dst)),
6214 SUBREG_BYTE (dst),
6215 GET_MODE (dst));
6216 dst = SUBREG_REG (dst);
6218 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6220 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6222 /* This must be an insn that clobbers r0. */
6223 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6224 XVECLEN (PATTERN (scan), 0)
6225 - 1);
6226 rtx clobber = *clobberp;
6228 gcc_assert (GET_CODE (clobber) == CLOBBER
6229 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6231 if (last_float
6232 && reg_set_between_p (r0_rtx, last_float_move, scan))
6233 last_float = 0;
6234 lab = add_constant (src, mode, last_float);
6235 if (lab)
6236 emit_insn_before (gen_mova (lab), scan);
6237 else
6239 /* There will be a REG_UNUSED note for r0 on
6240 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6241 lest reorg:mark_target_live_regs will not
6242 consider r0 to be used, and we end up with delay
6243 slot insn in front of SCAN that clobbers r0. */
6244 rtx note
6245 = find_regno_note (last_float_move, REG_UNUSED, 0);
6247 /* If we are not optimizing, then there may not be
6248 a note. */
6249 if (note)
6250 PUT_REG_NOTE_KIND (note, REG_INC);
6252 *last_float_addr = r0_inc_rtx;
6254 last_float_move = scan;
6255 last_float = src;
6256 newsrc = gen_const_mem (mode,
6257 (((TARGET_SH4 && ! TARGET_FMOVD)
6258 || REGNO (dst) == FPUL_REG)
6259 ? r0_inc_rtx
6260 : r0_rtx));
6261 last_float_addr = &XEXP (newsrc, 0);
6263 /* Remove the clobber of r0. */
6264 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6265 gen_rtx_SCRATCH (Pmode));
6267 /* This is a mova needing a label. Create it. */
6268 else if (GET_CODE (src) == UNSPEC
6269 && XINT (src, 1) == UNSPEC_MOVA
6270 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6272 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6273 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6274 newsrc = gen_rtx_UNSPEC (SImode,
6275 gen_rtvec (1, newsrc),
6276 UNSPEC_MOVA);
6278 else if (GET_CODE (src) == UNSPEC_VOLATILE
6279 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6281 newsrc = XVECEXP (src, 0, 0);
6282 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6283 INSN_CODE (scan) = -1;
6284 continue;
6286 else
6288 lab = add_constant (src, mode, 0);
6289 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6290 newsrc = gen_const_mem (mode, newsrc);
6292 *patp = gen_rtx_SET (dst, newsrc);
6293 INSN_CODE (scan) = -1;
6296 dump_table (need_aligned_label ? insn : 0, barrier);
6297 insn = barrier;
6300 label_ref_list_d_pool.release ();
6301 for (insn = first; insn; insn = NEXT_INSN (insn))
6302 PUT_MODE (insn, VOIDmode);
6304 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6305 INSN_ADDRESSES_FREE ();
6306 split_branches (first);
6308 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6309 also has an effect on the register that holds the address of the sfunc.
6310 Insert an extra dummy insn in front of each sfunc that pretends to
6311 use this register. */
6312 if (flag_delayed_branch)
6314 for (insn = first; insn; insn = NEXT_INSN (insn))
6316 rtx reg = sfunc_uses_reg (insn);
6318 if (! reg)
6319 continue;
6320 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6323 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6326 /* Return the UID of the insn that follows the specified label. */
6328 get_dest_uid (rtx_insn *label, int max_uid)
6330 rtx_insn *dest = next_real_insn (label);
6332 if (! dest)
6333 /* This can happen for an undefined label. */
6334 return 0;
6335 int dest_uid = INSN_UID (dest);
6336 /* If this is a newly created branch redirection blocking instruction,
6337 we cannot index the branch_uid or insn_addresses arrays with its
6338 uid. But then, we won't need to, because the actual destination is
6339 the following branch. */
6340 while (dest_uid >= max_uid)
6342 dest = NEXT_INSN (dest);
6343 dest_uid = INSN_UID (dest);
6345 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6346 return 0;
6347 return dest_uid;
6350 /* Split condbranches that are out of range. Also add clobbers for
6351 scratch registers that are needed in far jumps.
6352 We do this before delay slot scheduling, so that it can take our
6353 newly created instructions into account. It also allows us to
6354 find branches with common targets more easily. */
6355 static void
6356 split_branches (rtx_insn *first)
6358 rtx_insn *insn;
6359 struct far_branch **uid_branch, *far_branch_list = 0;
6360 int max_uid = get_max_uid ();
6361 int ok;
6363 /* Find out which branches are out of range. */
6364 shorten_branches (first);
6366 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6367 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6369 for (insn = first; insn; insn = NEXT_INSN (insn))
6370 if (! INSN_P (insn))
6371 continue;
6372 else if (insn->deleted ())
6374 /* Shorten_branches would split this instruction again,
6375 so transform it into a note. */
6376 SET_INSN_DELETED (insn);
6378 else if (JUMP_P (insn))
6380 enum attr_type type = get_attr_type (insn);
6381 if (type == TYPE_CBRANCH)
6383 rtx_insn *next, *beyond;
6385 if (get_attr_length (insn) > 4)
6387 rtx src = SET_SRC (PATTERN (insn));
6388 rtx_insn *olabel = safe_as_a <rtx_insn *> (XEXP (XEXP (src, 1), 0));
6389 int addr = INSN_ADDRESSES (INSN_UID (insn));
6390 rtx_insn *label = 0;
6391 int dest_uid = get_dest_uid (olabel, max_uid);
6392 struct far_branch *bp = uid_branch[dest_uid];
6394 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6395 the label if the LABEL_NUSES count drops to zero. There is
6396 always a jump_optimize pass that sets these values, but it
6397 proceeds to delete unreferenced code, and then if not
6398 optimizing, to un-delete the deleted instructions, thus
6399 leaving labels with too low uses counts. */
6400 if (! optimize)
6402 JUMP_LABEL (insn) = olabel;
6403 LABEL_NUSES (olabel)++;
6405 if (! bp)
6407 bp = (struct far_branch *) alloca (sizeof *bp);
6408 uid_branch[dest_uid] = bp;
6409 bp->prev = far_branch_list;
6410 far_branch_list = bp;
6411 bp->far_label = as_a <rtx_insn *> (
6412 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6413 0));
6414 LABEL_NUSES (bp->far_label)++;
6416 else
6418 label = bp->near_label;
6419 if (! label && bp->address - addr >= CONDJUMP_MIN)
6421 rtx_insn *block = bp->insert_place;
6423 if (GET_CODE (PATTERN (block)) == RETURN)
6424 block = PREV_INSN (block);
6425 else
6426 block = gen_block_redirect (block,
6427 bp->address, 2);
6428 label = emit_label_after (gen_label_rtx (),
6429 PREV_INSN (block));
6430 bp->near_label = label;
6432 else if (label && ! NEXT_INSN (label))
6434 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6435 bp->insert_place = insn;
6436 else
6437 gen_far_branch (bp);
6440 if (! label
6441 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6443 bp->near_label = label = gen_label_rtx ();
6444 bp->insert_place = insn;
6445 bp->address = addr;
6447 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6448 gcc_assert (ok);
6450 else
6452 /* get_attr_length (insn) == 2 */
6453 /* Check if we have a pattern where reorg wants to redirect
6454 the branch to a label from an unconditional branch that
6455 is too far away. */
6456 /* We can't use JUMP_LABEL here because it might be undefined
6457 when not optimizing. */
6458 /* A syntax error might cause beyond to be NULL_RTX. */
6459 rtx temp = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6460 beyond = next_active_insn (as_a<rtx_insn *> (temp));
6462 if (beyond
6463 && (JUMP_P (beyond)
6464 || ((beyond = next_active_insn (beyond))
6465 && JUMP_P (beyond)))
6466 && GET_CODE (PATTERN (beyond)) == SET
6467 && recog_memoized (beyond) == CODE_FOR_jump_compact
6468 && ((INSN_ADDRESSES
6469 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6470 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6471 > 252 + 258 + 2))
6472 gen_block_redirect (beyond,
6473 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6476 next = next_active_insn (insn);
6478 if (next
6479 && (JUMP_P (next)
6480 || ((next = next_active_insn (next))
6481 && JUMP_P (next)))
6482 && GET_CODE (PATTERN (next)) == SET
6483 && recog_memoized (next) == CODE_FOR_jump_compact
6484 && ((INSN_ADDRESSES
6485 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6486 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6487 > 252 + 258 + 2))
6488 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6490 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6492 int addr = INSN_ADDRESSES (INSN_UID (insn));
6493 rtx_insn *far_label = 0;
6494 int dest_uid = 0;
6495 struct far_branch *bp;
6497 if (type == TYPE_JUMP)
6499 if (CROSSING_JUMP_P (insn))
6501 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6502 insn);
6503 continue;
6506 far_label = as_a <rtx_insn *> (
6507 XEXP (SET_SRC (PATTERN (insn)), 0));
6508 dest_uid = get_dest_uid (far_label, max_uid);
6509 if (! dest_uid)
6511 /* Parse errors can lead to labels outside
6512 the insn stream. */
6513 if (! NEXT_INSN (far_label))
6514 continue;
6516 if (! optimize)
6518 JUMP_LABEL (insn) = far_label;
6519 LABEL_NUSES (far_label)++;
6521 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6522 far_label = 0;
6525 bp = uid_branch[dest_uid];
6526 if (! bp)
6528 bp = (struct far_branch *) alloca (sizeof *bp);
6529 uid_branch[dest_uid] = bp;
6530 bp->prev = far_branch_list;
6531 far_branch_list = bp;
6532 bp->near_label = 0;
6533 bp->far_label = far_label;
6534 if (far_label)
6535 LABEL_NUSES (far_label)++;
6537 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6538 if (addr - bp->address <= CONDJUMP_MAX)
6539 emit_label_after (bp->near_label, PREV_INSN (insn));
6540 else
6542 gen_far_branch (bp);
6543 bp->near_label = 0;
6545 else
6546 bp->near_label = 0;
6547 bp->address = addr;
6548 bp->insert_place = insn;
6549 if (! far_label)
6550 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6551 else
6552 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6555 /* Generate all pending far branches,
6556 and free our references to the far labels. */
6557 while (far_branch_list)
6559 if (far_branch_list->near_label
6560 && ! NEXT_INSN (far_branch_list->near_label))
6561 gen_far_branch (far_branch_list);
6562 if (optimize
6563 && far_branch_list->far_label
6564 && ! --LABEL_NUSES (far_branch_list->far_label))
6565 delete_insn (far_branch_list->far_label);
6566 far_branch_list = far_branch_list->prev;
6569 /* Instruction length information is no longer valid due to the new
6570 instructions that have been generated. */
6571 init_insn_lengths ();
6574 /* Dump out instruction addresses, which is useful for debugging the
6575 constant pool table stuff.
6577 If relaxing, output the label and pseudo-ops used to link together
6578 calls and the instruction which set the registers.
6580 ??? The addresses printed by this routine for insns are nonsense for
6581 insns which are inside of a sequence where none of the inner insns have
6582 variable length. This is because the second pass of shorten_branches
6583 does not bother to update them. */
6584 void
6585 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6586 int noperands ATTRIBUTE_UNUSED)
6588 if (TARGET_DUMPISIZE)
6589 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6591 if (TARGET_RELAX)
6593 if (rtx note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX))
6595 rtx pattern = PATTERN (insn);
6596 if (GET_CODE (pattern) == PARALLEL)
6597 pattern = XVECEXP (pattern, 0, 0);
6598 switch (GET_CODE (pattern))
6600 case SET:
6601 if (GET_CODE (SET_SRC (pattern)) != CALL
6602 && get_attr_type (insn) != TYPE_SFUNC)
6604 targetm.asm_out.internal_label
6605 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6606 break;
6608 /* FALLTHROUGH */
6609 case CALL:
6610 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6611 CODE_LABEL_NUMBER (XEXP (note, 0)));
6612 break;
6614 default:
6615 gcc_unreachable ();
6621 /* Dump out any constants accumulated in the final pass. These will
6622 only be labels. */
6623 const char *
6624 output_jump_label_table (void)
6626 if (pool_size)
6628 fprintf (asm_out_file, "\t.align 2\n");
6629 for (int i = 0; i < pool_size; i++)
6631 pool_node *p = &pool_vector[i];
6633 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6634 CODE_LABEL_NUMBER (p->label));
6635 output_asm_insn (".long %O0", &p->value);
6637 pool_size = 0;
6640 return "";
6643 /* A full frame looks like:
6645 arg-5
6646 arg-4
6647 [ if current_function_anonymous_args
6648 arg-3
6649 arg-2
6650 arg-1
6651 arg-0 ]
6652 saved-fp
6653 saved-r10
6654 saved-r11
6655 saved-r12
6656 saved-pr
6657 local-n
6659 local-1
6660 local-0 <- fp points here.
6662 Number of bytes pushed for anonymous args, used to pass information
6663 between expand_prologue and expand_epilogue.
6665 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6666 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6667 for an epilogue and a negative value means that it's for a sibcall
6668 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6669 all the registers that are about to be restored, and hence dead. */
6670 static void
6671 output_stack_adjust (int size, rtx reg, int epilogue_p,
6672 HARD_REG_SET *live_regs_mask, bool frame_p)
6674 rtx_insn *(*emit_fn) (rtx) = frame_p ? &emit_frame_insn : &emit_insn;
6675 if (size)
6677 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6679 /* This test is bogus, as output_stack_adjust is used to re-align the
6680 stack. */
6681 #if 0
6682 gcc_assert (!(size % align));
6683 #endif
6685 if (CONST_OK_FOR_ADD (size))
6686 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6687 /* Try to do it with two partial adjustments; however, we must make
6688 sure that the stack is properly aligned at all times, in case
6689 an interrupt occurs between the two partial adjustments. */
6690 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6691 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6693 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6694 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6696 else
6698 rtx const_reg;
6699 rtx insn;
6700 int temp = epilogue_p ? 7 : 1;
6701 int i;
6703 /* If TEMP is invalid, we could temporarily save a general
6704 register to MACL. However, there is currently no need
6705 to handle this case, so just die when we see it. */
6706 if (epilogue_p < 0
6707 || current_function_interrupt
6708 || ! call_really_used_regs[temp] || fixed_regs[temp])
6709 temp = -1;
6710 if (temp < 0 && ! current_function_interrupt && epilogue_p >= 0)
6712 HARD_REG_SET temps;
6713 COPY_HARD_REG_SET (temps, call_used_reg_set);
6714 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6715 if (epilogue_p > 0)
6717 int nreg = 0;
6718 if (crtl->return_rtx)
6720 machine_mode mode;
6721 mode = GET_MODE (crtl->return_rtx);
6722 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6723 nreg = hard_regno_nregs (FIRST_RET_REG, mode);
6725 for (i = 0; i < nreg; i++)
6726 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6727 if (crtl->calls_eh_return)
6729 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6730 for (i = 0; i <= 3; i++)
6731 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6734 if (epilogue_p <= 0)
6736 for (i = FIRST_PARM_REG;
6737 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6738 CLEAR_HARD_REG_BIT (temps, i);
6739 if (cfun->static_chain_decl != NULL)
6740 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6742 temp = scavenge_reg (&temps);
6744 if (temp < 0 && live_regs_mask)
6746 HARD_REG_SET temps;
6748 COPY_HARD_REG_SET (temps, *live_regs_mask);
6749 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6750 temp = scavenge_reg (&temps);
6752 if (temp < 0)
6754 rtx adj_reg, tmp_reg, mem;
6756 /* If we reached here, the most likely case is the (sibcall)
6757 epilogue. Put a special push/pop sequence for such case as
6758 the last resort. This looks lengthy but would not be problem
6759 because it seems to be very rare. */
6760 gcc_assert (epilogue_p);
6762 /* ??? There is still the slight possibility that r4 or
6763 r5 have been reserved as fixed registers or assigned
6764 as global registers, and they change during an
6765 interrupt. There are possible ways to handle this:
6767 - If we are adjusting the frame pointer (r14), we can do
6768 with a single temp register and an ordinary push / pop
6769 on the stack.
6770 - Grab any call-used or call-saved registers (i.e. not
6771 fixed or globals) for the temps we need. We might
6772 also grab r14 if we are adjusting the stack pointer.
6773 If we can't find enough available registers, issue
6774 a diagnostic and die - the user must have reserved
6775 way too many registers.
6776 But since all this is rather unlikely to happen and
6777 would require extra testing, we just die if r4 / r5
6778 are not available. */
6779 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6780 && !global_regs[4] && !global_regs[5]);
6782 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6783 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6784 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6785 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6786 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6787 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6788 emit_move_insn (mem, tmp_reg);
6789 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6790 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6791 emit_move_insn (mem, tmp_reg);
6792 emit_move_insn (reg, adj_reg);
6793 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6794 emit_move_insn (adj_reg, mem);
6795 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6796 emit_move_insn (tmp_reg, mem);
6797 /* Tell flow the insns that pop r4/r5 aren't dead. */
6798 emit_use (tmp_reg);
6799 emit_use (adj_reg);
6800 return;
6802 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6804 /* If SIZE is negative, subtract the positive value.
6805 This sometimes allows a constant pool entry to be shared
6806 between prologue and epilogue code. */
6807 if (size < 0)
6809 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6810 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6812 else
6814 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6815 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6817 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6818 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
6819 GEN_INT (size))));
6824 /* Emit the specified insn and mark it as frame related. */
6825 static rtx_insn *
6826 emit_frame_insn (rtx x)
6828 rtx_insn *insn = emit_insn (x);
6829 RTX_FRAME_RELATED_P (insn) = 1;
6830 return insn;
6833 /* Output RTL to push register RN onto the stack. */
6834 static rtx
6835 push (int rn)
6837 rtx x;
6838 if (rn == FPUL_REG)
6839 x = gen_push_fpul ();
6840 else if (rn == FPSCR_REG)
6841 x = gen_push_fpscr ();
6842 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6843 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6845 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6846 return NULL_RTX;
6847 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6849 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6850 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6851 else
6852 x = gen_push (gen_rtx_REG (SImode, rn));
6854 x = emit_frame_insn (x);
6855 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6856 return x;
6859 /* Output RTL to pop register RN from the stack. */
6860 static void
6861 pop (int rn)
6863 rtx x, sp_reg, reg;
6864 if (rn == FPUL_REG)
6865 x = gen_pop_fpul ();
6866 else if (rn == FPSCR_REG)
6867 x = gen_pop_fpscr ();
6868 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6869 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6871 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6872 return;
6873 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6875 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6876 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6877 else
6878 x = gen_pop (gen_rtx_REG (SImode, rn));
6880 x = emit_insn (x);
6882 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6883 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6884 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6885 : SET_DEST (PATTERN (x)));
6886 add_reg_note (x, REG_CFA_RESTORE, reg);
6887 add_reg_note (x, REG_CFA_ADJUST_CFA,
6888 gen_rtx_SET (sp_reg,
6889 plus_constant (SImode, sp_reg,
6890 GET_MODE_SIZE (GET_MODE (reg)))));
6891 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6892 RTX_FRAME_RELATED_P (x) = 1;
6895 /* Generate code to push the regs specified in the mask. */
6896 static void
6897 push_regs (HARD_REG_SET *mask, bool interrupt_handler)
6899 bool skip_fpscr = false;
6901 /* Push PR last; this gives better latencies after the prologue, and
6902 candidates for the return delay slot when there are no general
6903 registers pushed. */
6904 for (int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6905 i < FIRST_PSEUDO_REGISTER; i++)
6907 /* If this is an interrupt handler, and the SZ bit varies,
6908 and we have to push any floating point register, we need
6909 to switch to the correct precision first. */
6910 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6911 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6913 HARD_REG_SET unsaved;
6915 push (FPSCR_REG);
6916 COMPL_HARD_REG_SET (unsaved, *mask);
6917 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6918 skip_fpscr = true;
6920 if (i != PR_REG
6921 && (i != FPSCR_REG || ! skip_fpscr)
6922 && TEST_HARD_REG_BIT (*mask, i))
6924 /* If the ISR has RESBANK attribute assigned, don't push any of
6925 the following registers - R0-R14, MACH, MACL and GBR. */
6926 if (! (sh_cfun_resbank_handler_p ()
6927 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6928 || i == MACH_REG
6929 || i == MACL_REG
6930 || i == GBR_REG)))
6931 push (i);
6935 /* Push banked registers last to improve delay slot opportunities. */
6936 if (interrupt_handler)
6938 bool use_movml = false;
6940 if (TARGET_SH2A)
6942 unsigned int count = 0;
6944 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6945 if (TEST_HARD_REG_BIT (*mask, i))
6946 count++;
6947 else
6948 break;
6950 /* Use movml when all banked registers are pushed. */
6951 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6952 use_movml = true;
6955 if (sh_cfun_resbank_handler_p ())
6956 ; /* Do nothing. */
6957 else if (use_movml)
6959 rtx x, mem, reg, set;
6960 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6962 /* We must avoid scheduling multiple store insn with another
6963 insns. */
6964 emit_insn (gen_blockage ());
6965 x = gen_movml_push_banked (sp_reg);
6966 x = emit_frame_insn (x);
6967 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6969 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
6970 reg = gen_rtx_REG (SImode, i);
6971 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6974 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
6975 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6976 emit_insn (gen_blockage ());
6978 else
6979 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6980 if (TEST_HARD_REG_BIT (*mask, i))
6981 push (i);
6984 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6985 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6986 push (PR_REG);
6989 /* Work out the registers which need to be saved, both as a mask and a
6990 count of saved words. Return the count.
6992 If doing a pragma interrupt function, then push all regs used by the
6993 function, and if we call another function (we can tell by looking at PR),
6994 make sure that all the regs it clobbers are safe too. */
6995 static int
6996 calc_live_regs (HARD_REG_SET *live_regs_mask)
6998 unsigned int reg;
6999 tree attrs;
7000 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
7001 bool nosave_low_regs;
7003 attrs = DECL_ATTRIBUTES (current_function_decl);
7004 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7005 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7006 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7007 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7009 CLEAR_HARD_REG_SET (*live_regs_mask);
7010 if (TARGET_FPU_DOUBLE && TARGET_FMOVD && interrupt_handler
7011 && df_regs_ever_live_p (FPSCR_REG))
7012 target_flags &= ~MASK_FPU_SINGLE;
7013 /* If we can save a lot of saves by switching to double mode, do that. */
7014 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD && TARGET_FPU_SINGLE)
7015 for (int count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7016 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7017 && (! call_really_used_regs[reg]
7018 || interrupt_handler)
7019 && ++count > 2)
7021 target_flags &= ~MASK_FPU_SINGLE;
7022 break;
7026 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7027 bool pr_live = (pr_initial
7028 ? (!REG_P (pr_initial)
7029 || REGNO (pr_initial) != (PR_REG))
7030 : df_regs_ever_live_p (PR_REG));
7031 /* For Shcompact, if not optimizing, we end up with a memory reference
7032 using the return address pointer for __builtin_return_address even
7033 though there is no actual need to put the PR register on the stack. */
7034 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7036 /* Force PR to be live if the prologue has to call the SHmedia
7037 argument decoder or register saver. */
7038 bool has_call = pr_live;
7040 int count;
7041 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7043 if (reg == PR_REG
7044 ? pr_live
7045 : interrupt_handler
7046 ? (/* Need to save all the regs ever live. */
7047 (df_regs_ever_live_p (reg)
7048 || (call_really_used_regs[reg]
7049 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7050 || reg == PIC_OFFSET_TABLE_REGNUM)
7051 && has_call))
7052 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7053 && reg != RETURN_ADDRESS_POINTER_REGNUM
7054 && reg != T_REG && reg != GBR_REG
7055 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7056 /* Push fpscr only on targets which have FPU */
7057 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7058 : (/* Only push those regs which are used and need to be saved. */
7059 (false)
7060 || (df_regs_ever_live_p (reg)
7061 && ((!call_really_used_regs[reg]
7062 && !(reg != PIC_OFFSET_TABLE_REGNUM
7063 && fixed_regs[reg] && call_used_regs[reg]))
7064 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7065 || (crtl->calls_eh_return
7066 && (reg == EH_RETURN_DATA_REGNO (0)
7067 || reg == EH_RETURN_DATA_REGNO (1)
7068 || reg == EH_RETURN_DATA_REGNO (2)
7069 || reg == EH_RETURN_DATA_REGNO (3)))
7070 || ((reg == MACL_REG || reg == MACH_REG)
7071 && df_regs_ever_live_p (reg)
7072 && sh_cfun_attr_renesas_p ())
7075 SET_HARD_REG_BIT (*live_regs_mask, reg);
7076 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7078 if (TARGET_FPU_DOUBLE && TARGET_FMOVD
7079 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7081 if (FP_REGISTER_P (reg))
7083 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7085 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7086 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7089 else if (XD_REGISTER_P (reg))
7091 /* Must switch to double mode to access these registers. */
7092 target_flags &= ~MASK_FPU_SINGLE;
7096 if (nosave_low_regs && reg == R8_REG)
7097 break;
7100 return count;
7103 /* Code to generate prologue and epilogue sequences */
7105 /* PUSHED is the number of bytes that are being pushed on the
7106 stack for register saves. Return the frame size, padded
7107 appropriately so that the stack stays properly aligned. */
7108 static HOST_WIDE_INT
7109 rounded_frame_size (int pushed)
7111 HOST_WIDE_INT size = get_frame_size ();
7112 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7114 if (ACCUMULATE_OUTGOING_ARGS)
7115 size += crtl->outgoing_args_size;
7117 return ((size + pushed + align - 1) & -align) - pushed;
7120 /* Expand code for the function prologue. */
7121 void
7122 sh_expand_prologue (void)
7124 int save_flags = target_flags;
7125 tree sp_switch_attr
7126 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7128 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7130 /* We have pretend args if we had an object sent partially in registers
7131 and partially on the stack, e.g. a large structure. */
7132 int pretend_args = crtl->args.pretend_args_size;
7133 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7134 && (NPARM_REGS(SImode)
7135 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7136 pretend_args = 0;
7138 output_stack_adjust (-pretend_args, stack_pointer_rtx, 0, NULL, true);
7139 int stack_usage = pretend_args;
7141 /* Emit the code for SETUP_VARARGS. */
7142 if (cfun->stdarg)
7144 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7146 /* Push arg regs as if they'd been provided by caller in stack. */
7147 for (int i = 0; i < NPARM_REGS(SImode); i++)
7149 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7151 if (i >= (NPARM_REGS(SImode)
7152 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7154 break;
7155 push (rn);
7156 stack_usage += GET_MODE_SIZE (SImode);
7161 /* If we're supposed to switch stacks at function entry, do so now. */
7162 if (sp_switch_attr)
7164 rtx lab, newsrc;
7165 /* The argument specifies a variable holding the address of the
7166 stack the interrupt function should switch to/from at entry/exit. */
7167 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7168 const char* s = ggc_strdup (TREE_STRING_POINTER (arg));
7169 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7171 lab = add_constant (sp_switch, SImode, 0);
7172 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7174 emit_insn (gen_sp_switch_1 (newsrc));
7177 HARD_REG_SET live_regs_mask;
7178 int d = calc_live_regs (&live_regs_mask);
7179 /* ??? Maybe we could save some switching if we can move a mode switch
7180 that already happens to be at the function start into the prologue. */
7181 if (target_flags != save_flags && ! current_function_interrupt)
7182 emit_insn (gen_toggle_sz ());
7184 push_regs (&live_regs_mask, current_function_interrupt);
7185 stack_usage += d;
7187 if (flag_pic && !TARGET_FDPIC
7188 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7189 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7191 if (target_flags != save_flags && ! current_function_interrupt)
7192 emit_insn (gen_toggle_sz ());
7194 target_flags = save_flags;
7196 output_stack_adjust (-rounded_frame_size (d),
7197 stack_pointer_rtx, 0, NULL, true);
7198 stack_usage += rounded_frame_size (d);
7200 if (frame_pointer_needed)
7201 emit_frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7203 /* If we are profiling, make sure no instructions are scheduled before
7204 the call to mcount. Similarly if some call instructions are swapped
7205 before frame related insns, it'll confuse the unwinder because
7206 currently SH has no unwind info for function epilogues. */
7207 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7208 emit_insn (gen_blockage ());
7210 if (flag_stack_usage_info)
7211 current_function_static_stack_size = stack_usage;
7214 /* Expand code for the function epilogue. */
7215 void
7216 sh_expand_epilogue (bool sibcall_p)
7218 int save_flags = target_flags;
7219 bool fpscr_deferred = false;
7220 int e = sibcall_p ? -1 : 1;
7222 HARD_REG_SET live_regs_mask;
7223 int d = calc_live_regs (&live_regs_mask);
7225 int save_size = d;
7226 int frame_size = rounded_frame_size (d);
7228 if (frame_pointer_needed)
7230 /* We must avoid scheduling the epilogue with previous basic blocks.
7231 See PR/18032 and PR/40313. */
7232 emit_insn (gen_blockage ());
7233 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7234 &live_regs_mask, true);
7236 /* We must avoid moving the stack pointer adjustment past code
7237 which reads from the local frame, else an interrupt could
7238 occur after the SP adjustment and clobber data in the local
7239 frame. */
7240 emit_insn (gen_blockage ());
7241 emit_frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7243 else if (frame_size)
7245 /* We must avoid moving the stack pointer adjustment past code
7246 which reads from the local frame, else an interrupt could
7247 occur after the SP adjustment and clobber data in the local
7248 frame. */
7249 emit_insn (gen_blockage ());
7250 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7251 &live_regs_mask, true);
7254 /* Pop all the registers. */
7256 if (target_flags != save_flags && ! current_function_interrupt)
7257 emit_insn (gen_toggle_sz ());
7260 int last_reg;
7262 save_size = 0;
7263 /* For an ISR with RESBANK attribute assigned, don't pop PR
7264 register. */
7265 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7266 && !sh_cfun_resbank_handler_p ())
7268 if (!frame_pointer_needed)
7269 emit_insn (gen_blockage ());
7270 pop (PR_REG);
7273 /* Banked registers are popped first to avoid being scheduled in the
7274 delay slot. RTE switches banks before the ds instruction. */
7275 if (current_function_interrupt)
7277 bool use_movml = false;
7279 if (TARGET_SH2A)
7281 unsigned int count = 0;
7283 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7284 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7285 count++;
7286 else
7287 break;
7289 /* Use movml when all banked register are poped. */
7290 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7291 use_movml = true;
7294 if (sh_cfun_resbank_handler_p ())
7295 ; /* Do nothing. */
7296 else if (use_movml)
7298 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7300 /* We must avoid scheduling multiple load insn with another
7301 insns. */
7302 emit_insn (gen_blockage ());
7303 emit_insn (gen_movml_pop_banked (sp_reg));
7304 emit_insn (gen_blockage ());
7306 else
7307 for (int i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7308 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7309 pop (i);
7311 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7313 else
7314 last_reg = FIRST_PSEUDO_REGISTER;
7316 for (int i = 0; i < last_reg; i++)
7318 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7320 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7321 && hard_reg_set_intersect_p (live_regs_mask,
7322 reg_class_contents[DF_REGS]))
7323 fpscr_deferred = true;
7324 /* For an ISR with RESBANK attribute assigned, don't pop
7325 following registers, R0-R14, MACH, MACL and GBR. */
7326 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7327 && ! (sh_cfun_resbank_handler_p ()
7328 && ((j >= FIRST_GENERAL_REG
7329 && j < LAST_GENERAL_REG)
7330 || j == MACH_REG
7331 || j == MACL_REG
7332 || j == GBR_REG)))
7333 pop (j);
7335 if (j == FIRST_FP_REG && fpscr_deferred)
7336 pop (FPSCR_REG);
7339 if (target_flags != save_flags && ! current_function_interrupt)
7340 emit_insn (gen_toggle_sz ());
7341 target_flags = save_flags;
7343 output_stack_adjust (crtl->args.pretend_args_size + save_size,
7344 stack_pointer_rtx, e, NULL, true);
7346 if (crtl->calls_eh_return)
7347 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7348 EH_RETURN_STACKADJ_RTX));
7350 /* Switch back to the normal stack if necessary. */
7351 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7352 emit_insn (gen_sp_switch_2 ());
7354 /* Tell flow the insn that pops PR isn't dead. */
7355 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7356 emit_use (gen_rtx_REG (SImode, PR_REG));
7359 /* Emit code to change the current function's return address to RA.
7360 TEMP is available as a scratch register, if needed. */
7361 void
7362 sh_set_return_address (rtx ra, rtx tmp)
7364 HARD_REG_SET live_regs_mask;
7365 int d = calc_live_regs (&live_regs_mask);
7367 /* If pr_reg isn't life, we can set it directly. */
7368 if (! TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7370 rtx rr = gen_rtx_REG (SImode, PR_REG);
7371 emit_insn (GEN_MOV (rr, ra));
7372 /* Tell flow the register for return isn't dead. */
7373 emit_use (rr);
7374 return;
7377 int pr_offset = rounded_frame_size (d);
7379 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7381 if (frame_pointer_needed)
7382 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7383 else
7384 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7386 tmp = gen_frame_mem (Pmode, tmp);
7387 emit_insn (GEN_MOV (tmp, ra));
7388 /* Tell this store isn't dead. */
7389 emit_use (tmp);
7392 /* Clear variables at function end. */
7393 static void
7394 sh_output_function_epilogue (FILE *)
7398 static rtx
7399 sh_builtin_saveregs (void)
7401 /* First unnamed integer register. */
7402 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7403 /* Number of integer registers we need to save. */
7404 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7405 /* First unnamed SFmode float reg */
7406 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7407 /* Number of SFmode float regs to save. */
7408 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7409 rtx regbuf, fpregs;
7410 int bufsize, regno;
7411 alias_set_type alias_set;
7413 if (!TARGET_FPU_ANY)
7415 error ("__builtin_saveregs not supported by this subtarget");
7416 return const0_rtx;
7419 /* Allocate block of memory for the regs. */
7420 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7421 Or can assign_stack_local accept a 0 SIZE argument? */
7422 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7424 if (n_floatregs & 1)
7426 rtx addr;
7428 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7429 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7430 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7431 regbuf = change_address (regbuf, BLKmode, addr);
7433 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7435 rtx addr, mask;
7437 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7438 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
7439 XEXP (regbuf, 0), 4));
7440 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7441 emit_insn (gen_andsi3 (addr, addr, mask));
7442 regbuf = change_address (regbuf, BLKmode, addr);
7444 else
7445 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7446 alias_set = get_varargs_alias_set ();
7447 set_mem_alias_set (regbuf, alias_set);
7449 /* Save int args.
7450 This is optimized to only save the regs that are necessary. Explicitly
7451 named args need not be saved. */
7452 if (n_intregs > 0)
7453 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7454 adjust_address (regbuf, BLKmode,
7455 n_floatregs * UNITS_PER_WORD),
7456 n_intregs);
7458 /* Save float args.
7459 This is optimized to only save the regs that are necessary. Explicitly
7460 named args need not be saved.
7461 We explicitly build a pointer to the buffer because it halves the insn
7462 count when not optimizing (otherwise the pointer is built for each reg
7463 saved).
7464 We emit the moves in reverse order so that we can use predecrement. */
7466 fpregs = copy_to_mode_reg (Pmode,
7467 plus_constant (Pmode, XEXP (regbuf, 0),
7468 n_floatregs * UNITS_PER_WORD));
7469 if (TARGET_FPU_DOUBLE)
7471 rtx mem;
7472 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7474 emit_insn (gen_addsi3 (fpregs, fpregs,
7475 GEN_INT (-2 * UNITS_PER_WORD)));
7476 mem = change_address (regbuf, DFmode, fpregs);
7477 emit_move_insn (mem,
7478 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7480 regno = first_floatreg;
7481 if (regno & 1)
7483 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7484 mem = change_address (regbuf, SFmode, fpregs);
7485 emit_move_insn (mem,
7486 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
7487 + regno - SH_REG_MSW_OFFSET));
7490 else
7491 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7493 rtx mem;
7495 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7496 mem = change_address (regbuf, SFmode, fpregs);
7497 emit_move_insn (mem,
7498 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7501 /* Return the address of the regbuf. */
7502 return XEXP (regbuf, 0);
7505 /* Define the `__builtin_va_list' type for the ABI. */
7506 static tree
7507 sh_build_builtin_va_list (void)
7509 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7510 tree record, type_decl;
7512 if ((! TARGET_SH2E && ! TARGET_SH4)
7513 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7514 return ptr_type_node;
7516 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7517 type_decl = build_decl (BUILTINS_LOCATION,
7518 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7520 f_next_o = build_decl (BUILTINS_LOCATION,
7521 FIELD_DECL, get_identifier ("__va_next_o"),
7522 ptr_type_node);
7523 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7524 FIELD_DECL,
7525 get_identifier ("__va_next_o_limit"),
7526 ptr_type_node);
7527 f_next_fp = build_decl (BUILTINS_LOCATION,
7528 FIELD_DECL, get_identifier ("__va_next_fp"),
7529 ptr_type_node);
7530 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7531 FIELD_DECL,
7532 get_identifier ("__va_next_fp_limit"),
7533 ptr_type_node);
7534 f_next_stack = build_decl (BUILTINS_LOCATION,
7535 FIELD_DECL, get_identifier ("__va_next_stack"),
7536 ptr_type_node);
7538 DECL_FIELD_CONTEXT (f_next_o) = record;
7539 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7540 DECL_FIELD_CONTEXT (f_next_fp) = record;
7541 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7542 DECL_FIELD_CONTEXT (f_next_stack) = record;
7544 TYPE_STUB_DECL (record) = type_decl;
7545 TYPE_NAME (record) = type_decl;
7546 TYPE_FIELDS (record) = f_next_o;
7547 DECL_CHAIN (f_next_o) = f_next_o_limit;
7548 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7549 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7550 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7552 layout_type (record);
7554 return record;
7557 /* Implement `va_start' for varargs and stdarg. */
7558 static void
7559 sh_va_start (tree valist, rtx nextarg)
7561 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7562 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7563 tree t, u;
7564 int nfp, nint;
7566 if ((! TARGET_SH2E && ! TARGET_SH4)
7567 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7569 std_expand_builtin_va_start (valist, nextarg);
7570 return;
7573 f_next_o = TYPE_FIELDS (va_list_type_node);
7574 f_next_o_limit = DECL_CHAIN (f_next_o);
7575 f_next_fp = DECL_CHAIN (f_next_o_limit);
7576 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7577 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7579 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7580 NULL_TREE);
7581 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7582 valist, f_next_o_limit, NULL_TREE);
7583 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7584 NULL_TREE);
7585 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7586 valist, f_next_fp_limit, NULL_TREE);
7587 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7588 valist, f_next_stack, NULL_TREE);
7590 /* Call __builtin_saveregs. */
7591 u = make_tree (sizetype, expand_builtin_saveregs ());
7592 u = fold_convert (ptr_type_node, u);
7593 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7594 TREE_SIDE_EFFECTS (t) = 1;
7595 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7597 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7598 if (nfp < 8)
7599 nfp = 8 - nfp;
7600 else
7601 nfp = 0;
7602 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
7603 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7604 TREE_SIDE_EFFECTS (t) = 1;
7605 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7607 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7608 TREE_SIDE_EFFECTS (t) = 1;
7609 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7611 nint = crtl->args.info.arg_count[SH_ARG_INT];
7612 if (nint < 4)
7613 nint = 4 - nint;
7614 else
7615 nint = 0;
7616 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
7617 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7618 TREE_SIDE_EFFECTS (t) = 1;
7619 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7621 u = make_tree (ptr_type_node, nextarg);
7622 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7623 TREE_SIDE_EFFECTS (t) = 1;
7624 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7627 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7628 member, return it. */
7629 static tree
7630 find_sole_member (tree type)
7632 tree field, member = NULL_TREE;
7634 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7636 if (TREE_CODE (field) != FIELD_DECL)
7637 continue;
7638 if (!DECL_SIZE (field))
7639 return NULL_TREE;
7640 if (integer_zerop (DECL_SIZE (field)))
7641 continue;
7642 if (member)
7643 return NULL_TREE;
7644 member = field;
7646 return member;
7649 /* Implement `va_arg'. */
7650 static tree
7651 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7652 gimple_seq *post_p ATTRIBUTE_UNUSED)
7654 tree tmp;
7655 tree addr, lab_over = NULL, result = NULL;
7656 tree eff_type;
7658 const bool pass_by_ref =
7659 !VOID_TYPE_P (type)
7660 && targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7662 if (pass_by_ref)
7663 type = build_pointer_type (type);
7665 HOST_WIDE_INT size = int_size_in_bytes (type);
7666 HOST_WIDE_INT rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7667 tree pptr_type_node = build_pointer_type (ptr_type_node);
7669 if ((TARGET_SH2E || TARGET_SH4)
7670 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7672 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7673 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7674 tree lab_false;
7675 tree member;
7677 f_next_o = TYPE_FIELDS (va_list_type_node);
7678 f_next_o_limit = DECL_CHAIN (f_next_o);
7679 f_next_fp = DECL_CHAIN (f_next_o_limit);
7680 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7681 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7683 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7684 NULL_TREE);
7685 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7686 valist, f_next_o_limit, NULL_TREE);
7687 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7688 valist, f_next_fp, NULL_TREE);
7689 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7690 valist, f_next_fp_limit, NULL_TREE);
7691 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7692 valist, f_next_stack, NULL_TREE);
7694 /* Structures with a single member with a distinct mode are passed
7695 like their member. This is relevant if the latter has a REAL_TYPE
7696 or COMPLEX_TYPE type. */
7697 eff_type = type;
7698 while (TREE_CODE (eff_type) == RECORD_TYPE
7699 && (member = find_sole_member (eff_type))
7700 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7701 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7702 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7704 tree field_type = TREE_TYPE (member);
7706 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7707 eff_type = field_type;
7708 else
7710 gcc_assert ((TYPE_ALIGN (eff_type)
7711 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7712 || (TYPE_ALIGN (eff_type)
7713 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7714 break;
7718 bool pass_as_float;
7719 if (TARGET_FPU_DOUBLE)
7721 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7722 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7723 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7724 && size <= 16));
7726 else
7728 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7731 addr = create_tmp_var (pptr_type_node);
7732 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7733 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7735 valist = build_simple_mem_ref (addr);
7737 if (pass_as_float)
7739 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
7740 tree cmp;
7741 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7743 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7744 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7746 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7747 tmp = next_fp_limit;
7748 if (size > 4 && !is_double)
7749 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
7750 tmp = build2 (GE_EXPR, boolean_type_node,
7751 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7752 cmp = build3 (COND_EXPR, void_type_node, tmp,
7753 build1 (GOTO_EXPR, void_type_node,
7754 unshare_expr (lab_false)), NULL_TREE);
7755 if (!is_double)
7756 gimplify_and_add (cmp, pre_p);
7758 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7759 || (is_double || size == 16))
7761 tmp = fold_convert (sizetype, next_fp_tmp);
7762 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7763 size_int (UNITS_PER_WORD));
7764 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
7765 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7767 if (is_double)
7768 gimplify_and_add (cmp, pre_p);
7770 #ifdef FUNCTION_ARG_SCmode_WART
7771 if (TYPE_MODE (eff_type) == SCmode
7772 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7774 tree subtype = TREE_TYPE (eff_type);
7775 tree real, imag;
7777 imag
7778 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7779 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7781 real
7782 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7783 real = get_initialized_tmp_var (real, pre_p, NULL);
7785 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7786 if (type != eff_type)
7787 result = build1 (VIEW_CONVERT_EXPR, type, result);
7788 result = get_initialized_tmp_var (result, pre_p, NULL);
7790 #endif /* FUNCTION_ARG_SCmode_WART */
7792 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7793 gimplify_and_add (tmp, pre_p);
7795 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7796 gimplify_and_add (tmp, pre_p);
7798 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7799 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7800 gimplify_assign (unshare_expr (next_fp_tmp),
7801 unshare_expr (valist), pre_p);
7803 gimplify_assign (unshare_expr (valist),
7804 unshare_expr (next_fp_tmp), post_p);
7805 valist = next_fp_tmp;
7807 else
7809 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
7810 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7811 unshare_expr (next_o_limit));
7812 tmp = build3 (COND_EXPR, void_type_node, tmp,
7813 build1 (GOTO_EXPR, void_type_node,
7814 unshare_expr (lab_false)),
7815 NULL_TREE);
7816 gimplify_and_add (tmp, pre_p);
7818 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7819 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7821 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7822 gimplify_and_add (tmp, pre_p);
7824 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7825 gimplify_and_add (tmp, pre_p);
7827 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7828 gimplify_assign (unshare_expr (next_o),
7829 unshare_expr (next_o_limit), pre_p);
7831 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7832 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7835 if (!result)
7837 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7838 gimplify_and_add (tmp, pre_p);
7842 /* ??? In va-sh.h, there had been code to make values larger than
7843 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7845 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7846 if (result)
7848 gimplify_assign (result, tmp, pre_p);
7849 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7850 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7851 gimplify_and_add (tmp, pre_p);
7853 else
7854 result = tmp;
7856 if (pass_by_ref)
7857 result = build_va_arg_indirect_ref (result);
7859 return result;
7862 /* 64 bit floating points memory transfers are paired single precision loads
7863 or store. So DWARF information needs fixing in little endian (unless
7864 PR=SZ=1 in FPSCR). */
7866 sh_dwarf_register_span (rtx reg)
7868 unsigned regno = REGNO (reg);
7870 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7871 return NULL_RTX;
7873 return
7874 gen_rtx_PARALLEL (VOIDmode,
7875 gen_rtvec (2,
7876 gen_rtx_REG (SFmode, regno + 1),
7877 gen_rtx_REG (SFmode, regno)));
7880 static machine_mode
7881 sh_promote_function_mode (const_tree type, machine_mode mode,
7882 int *punsignedp, const_tree funtype,
7883 int for_return)
7885 if (sh_promote_prototypes (funtype))
7886 return promote_mode (type, mode, punsignedp);
7887 else
7888 return default_promote_function_mode (type, mode, punsignedp, funtype,
7889 for_return);
7892 static bool
7893 sh_promote_prototypes (const_tree type)
7895 if (TARGET_HITACHI)
7896 return false;
7897 if (! type)
7898 return true;
7899 return ! sh_attr_renesas_p (type);
7902 static bool
7903 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7904 const_tree type, bool named ATTRIBUTE_UNUSED)
7906 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7908 if (targetm.calls.must_pass_in_stack (mode, type))
7909 return true;
7911 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7912 wants to know about pass-by-reference semantics for incoming
7913 arguments. */
7914 if (! cum)
7915 return false;
7917 return false;
7920 static bool
7921 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
7922 const_tree type, bool named ATTRIBUTE_UNUSED)
7924 /* ??? How can it possibly be correct to return true only on the
7925 caller side of the equation? Is there someplace else in the
7926 sh backend that's magically producing the copies? */
7927 return (get_cumulative_args (cum)->outgoing
7928 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7929 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7932 static sh_arg_class
7933 get_sh_arg_class (machine_mode mode)
7935 if (TARGET_FPU_ANY && mode == SFmode)
7936 return SH_ARG_FLOAT;
7938 if (TARGET_FPU_DOUBLE
7939 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7940 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT))
7941 return SH_ARG_FLOAT;
7943 return SH_ARG_INT;
7946 /* Round a register number up to a proper boundary for an arg of mode
7947 MODE.
7948 The SH doesn't care about double alignment, so we only
7949 round doubles to even regs when asked to explicitly. */
7950 static int
7951 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
7953 /* FIXME: This used to be a macro and has been copy pasted into this
7954 function as is. Make this more readable. */
7955 return
7956 (((TARGET_ALIGN_DOUBLE
7957 || (TARGET_FPU_DOUBLE
7958 && (mode == DFmode || mode == DCmode)
7959 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
7960 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
7961 ? (cum.arg_count[(int) get_sh_arg_class (mode)]
7962 + (cum.arg_count[(int) get_sh_arg_class (mode)] & 1))
7963 : cum.arg_count[(int) get_sh_arg_class (mode)]);
7966 /* Return true if arg of the specified mode should be passed in a register
7967 or false otherwise. */
7968 static bool
7969 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
7970 const_tree type)
7972 /* FIXME: This used to be a macro and has been copy pasted into this
7973 function as is. Make this more readable. */
7974 return
7975 ((type == 0
7976 || (! TREE_ADDRESSABLE (type)
7977 && (! (TARGET_HITACHI || cum.renesas_abi)
7978 || ! (AGGREGATE_TYPE_P (type)
7979 || (!TARGET_FPU_ANY
7980 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7981 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
7982 && ! cum.force_mem
7983 && (TARGET_SH2E
7984 ? ((mode) == BLKmode
7985 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
7986 + int_size_in_bytes (type))
7987 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
7988 : ((sh_round_reg (cum, mode)
7989 + sh_hard_regno_nregs (BASE_ARG_REG (mode), mode))
7990 <= NPARM_REGS (mode)))
7991 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
7994 static int
7995 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
7996 tree type, bool named ATTRIBUTE_UNUSED)
7998 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7999 int words = 0;
8001 if (sh_pass_in_reg_p (*cum, mode, type)
8002 && !TARGET_FPU_DOUBLE
8003 && (sh_round_reg (*cum, mode)
8004 + (mode != BLKmode
8005 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
8006 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
8007 > NPARM_REGS (mode)))
8008 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
8010 return words * UNITS_PER_WORD;
8014 /* Define where to put the arguments to a function.
8015 Value is zero to push the argument on the stack,
8016 or a hard register in which to store the argument.
8018 MODE is the argument's machine mode.
8019 TYPE is the data type of the argument (as a tree).
8020 This is null for libcalls where that information may
8021 not be available.
8022 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8023 the preceding args and about the function being called.
8024 NAMED is nonzero if this argument is a named parameter
8025 (otherwise it is an extra parameter matching an ellipsis).
8027 On SH the first args are normally in registers
8028 and the rest are pushed. Any arg that starts within the first
8029 NPARM_REGS words is at least partially passed in a register unless
8030 its data type forbids. */
8031 static rtx
8032 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
8033 const_tree type, bool named)
8035 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8037 if (mode == VOIDmode)
8038 return ca->renesas_abi ? const1_rtx : const0_rtx;
8040 if (sh_pass_in_reg_p (*ca, mode, type)
8041 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8043 int regno;
8045 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8046 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8048 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8049 gen_rtx_REG (SFmode,
8050 BASE_ARG_REG (mode)
8051 + (sh_round_reg (*ca, mode) ^ 1)),
8052 const0_rtx);
8053 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8054 gen_rtx_REG (SFmode,
8055 BASE_ARG_REG (mode)
8056 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8057 GEN_INT (4));
8058 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8061 /* If the alignment of a DF value causes an SF register to be
8062 skipped, we will use that skipped register for the next SF
8063 value. */
8064 if ((TARGET_HITACHI || ca->renesas_abi)
8065 && ca->free_single_fp_reg
8066 && mode == SFmode)
8067 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8069 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8070 ^ (mode == SFmode && TARGET_SH4
8071 && TARGET_LITTLE_ENDIAN
8072 && ! TARGET_HITACHI && ! ca->renesas_abi);
8073 return gen_rtx_REG (mode, regno);
8077 return NULL_RTX;
8080 /* Update the data in CUM to advance over an argument
8081 of mode MODE and data type TYPE.
8082 (TYPE is null for libcalls where that information may not be
8083 available.) */
8084 static void
8085 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
8086 const_tree type, bool named ATTRIBUTE_UNUSED)
8088 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8090 if (ca->force_mem)
8091 ca->force_mem = false;
8093 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8095 /* Note that we've used the skipped register. */
8096 if (mode == SFmode && ca->free_single_fp_reg)
8098 ca->free_single_fp_reg = 0;
8099 return;
8101 /* When we have a DF after an SF, there's an SF register that get
8102 skipped in order to align the DF value. We note this skipped
8103 register, because the next SF value will use it, and not the
8104 SF that follows the DF. */
8105 if (mode == DFmode
8106 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
8108 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
8109 + BASE_ARG_REG (mode));
8113 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8114 || sh_pass_in_reg_p (*ca, mode, type))
8115 (ca->arg_count[(int) get_sh_arg_class (mode)]
8116 = (sh_round_reg (*ca, mode)
8117 + (mode == BLKmode
8118 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8119 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
8122 /* The Renesas calling convention doesn't quite fit into this scheme since
8123 the address is passed like an invisible argument, but one that is always
8124 passed in memory. */
8125 static rtx
8126 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8128 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8129 return NULL_RTX;
8130 return gen_rtx_REG (Pmode, 2);
8133 /* Worker function for TARGET_FUNCTION_VALUE.
8135 For the SH, this is like LIBCALL_VALUE, except that we must change the
8136 mode like PROMOTE_MODE does.
8137 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8138 tested here has to be kept in sync with the one in
8139 explow.c:promote_mode. */
8140 static rtx
8141 sh_function_value (const_tree valtype,
8142 const_tree fn_decl_or_type,
8143 bool outgoing ATTRIBUTE_UNUSED)
8145 if (fn_decl_or_type
8146 && !DECL_P (fn_decl_or_type))
8147 fn_decl_or_type = NULL;
8149 return gen_rtx_REG (
8150 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8151 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8152 && (TREE_CODE (valtype) == INTEGER_TYPE
8153 || TREE_CODE (valtype) == ENUMERAL_TYPE
8154 || TREE_CODE (valtype) == BOOLEAN_TYPE
8155 || TREE_CODE (valtype) == REAL_TYPE
8156 || TREE_CODE (valtype) == OFFSET_TYPE))
8157 && sh_promote_prototypes (fn_decl_or_type)
8158 ? SImode : TYPE_MODE (valtype)),
8159 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8162 /* Worker function for TARGET_LIBCALL_VALUE. */
8163 static rtx
8164 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8166 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8169 /* Return true if N is a possible register number of function value. */
8170 static bool
8171 sh_function_value_regno_p (const unsigned int regno)
8173 return regno == FIRST_RET_REG || (TARGET_SH2E && regno == FIRST_FP_RET_REG);
8176 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8177 static bool
8178 sh_return_in_memory (const_tree type, const_tree fndecl)
8180 return TYPE_MODE (type) == BLKmode
8181 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8182 && TREE_CODE (type) == RECORD_TYPE);
8185 /* We actually emit the code in sh_expand_prologue. We used to use
8186 a static variable to flag that we need to emit this code, but that
8187 doesn't when inlining, when functions are deferred and then emitted
8188 later. Fortunately, we already have two flags that are part of struct
8189 function that tell if a function uses varargs or stdarg. */
8190 static void
8191 sh_setup_incoming_varargs (cumulative_args_t ca,
8192 machine_mode mode,
8193 tree type,
8194 int *pretend_arg_size,
8195 int second_time ATTRIBUTE_UNUSED)
8197 gcc_assert (cfun->stdarg);
8198 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8200 int named_parm_regs, anon_parm_regs;
8202 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
8203 + (mode == BLKmode
8204 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8205 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
8206 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8207 if (anon_parm_regs > 0)
8208 *pretend_arg_size = anon_parm_regs * 4;
8212 static bool
8213 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8215 return false;
8218 static bool
8219 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8221 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8223 return ! (TARGET_HITACHI || ca->renesas_abi);
8227 /* Define the offset between two registers, one to be eliminated, and
8228 the other its replacement, at the start of a routine. */
8230 initial_elimination_offset (int from, int to)
8232 const int regs_saved_rounding = 0;
8233 int save_flags = target_flags;
8234 HARD_REG_SET live_regs_mask;
8236 int regs_saved = calc_live_regs (&live_regs_mask);
8238 int total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8239 target_flags = save_flags;
8241 int total_saved_regs_space = regs_saved + regs_saved_rounding;
8243 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8244 return total_saved_regs_space + total_auto_space;
8246 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8247 return total_saved_regs_space + total_auto_space;
8249 /* Initial gap between fp and sp is 0. */
8250 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8251 return 0;
8253 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8254 return rounded_frame_size (0);
8256 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8257 return rounded_frame_size (0);
8259 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8260 && (to == HARD_FRAME_POINTER_REGNUM
8261 || to == STACK_POINTER_REGNUM));
8262 return total_auto_space;
8265 /* Parse the -mfixed-range= option string. */
8266 void
8267 sh_fix_range (const char *const_str)
8269 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8270 REG2 are either register names or register numbers. The effect
8271 of this option is to mark the registers in the range from REG1 to
8272 REG2 as ``fixed'' so they won't be used by the compiler. */
8274 char* str = strcpy ((char*)alloca (strlen (const_str) + 1), const_str);
8276 while (1)
8278 char* dash = strchr (str, '-');
8279 if (!dash)
8281 warning (0, "value of -mfixed-range must have form REG1-REG2");
8282 return;
8284 *dash = '\0';
8285 char* comma = strchr (dash + 1, ',');
8286 if (comma)
8287 *comma = '\0';
8289 int first = decode_reg_name (str);
8290 if (first < 0)
8292 warning (0, "unknown register name: %s", str);
8293 return;
8296 int last = decode_reg_name (dash + 1);
8297 if (last < 0)
8299 warning (0, "unknown register name: %s", dash + 1);
8300 return;
8303 *dash = '-';
8305 if (first > last)
8307 warning (0, "%s-%s is an empty range", str, dash + 1);
8308 return;
8311 for (int i = first; i <= last; ++i)
8312 fixed_regs[i] = call_used_regs[i] = 1;
8314 if (!comma)
8315 break;
8317 *comma = ',';
8318 str = comma + 1;
8322 /* Insert any deferred function attributes from earlier pragmas. */
8323 static void
8324 sh_insert_attributes (tree node, tree *attributes)
8326 if (TREE_CODE (node) != FUNCTION_DECL)
8327 return;
8329 /* We are only interested in fields. */
8330 if (!DECL_P (node))
8331 return;
8333 /* Append the attributes to the deferred attributes. */
8334 *sh_deferred_function_attributes_tail = *attributes;
8335 tree attrs = sh_deferred_function_attributes;
8336 if (!attrs)
8337 return;
8339 /* Some attributes imply or require the interrupt attribute. */
8340 if (!lookup_attribute ("interrupt_handler", attrs)
8341 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8343 /* If we have a trapa_handler, but no interrupt_handler attribute,
8344 insert an interrupt_handler attribute. */
8345 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8346 /* We can't use sh_pr_interrupt here because that's not in the
8347 java frontend. */
8348 attrs
8349 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8350 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8351 if the interrupt attribute is missing, we ignore the attribute
8352 and warn. */
8353 else if (lookup_attribute ("sp_switch", attrs)
8354 || lookup_attribute ("trap_exit", attrs)
8355 || lookup_attribute ("nosave_low_regs", attrs)
8356 || lookup_attribute ("resbank", attrs))
8358 tree *tail;
8360 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8362 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8363 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8364 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8365 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8366 warning (OPT_Wattributes,
8367 "%qE attribute only applies to interrupt functions",
8368 TREE_PURPOSE (attrs));
8369 else
8371 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8372 NULL_TREE);
8373 tail = &TREE_CHAIN (*tail);
8376 attrs = *attributes;
8380 /* Install the processed list. */
8381 *attributes = attrs;
8383 /* Clear deferred attributes. */
8384 sh_deferred_function_attributes = NULL_TREE;
8385 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8387 return;
8390 /*------------------------------------------------------------------------------
8391 Target specific attributes
8392 Supported attributes are:
8394 * interrupt_handler
8395 Specifies this function is an interrupt handler.
8397 * trapa_handler
8398 Like interrupt_handler, but don't save all registers.
8400 * sp_switch
8401 Specifies an alternate stack for an interrupt handler to run on.
8403 * trap_exit
8404 Use a trapa to exit an interrupt function instead of rte.
8406 * nosave_low_regs
8407 Don't save r0..r7 in an interrupt handler function.
8408 This is useful on SH3* and SH4*, which have a separate set of low
8409 regs for user and privileged modes.
8410 This is mainly to be used for non-reentrant interrupt handlers (i.e.
8411 those that run with interrupts disabled and thus can't be
8412 interrupted thenselves).
8414 * renesas
8415 Use Renesas calling/layout conventions (functions and structures).
8417 * resbank
8418 In case of an interrupt handler function, use a register bank to
8419 save registers R0-R14, MACH, MACL, GBR and PR.
8420 This is available only on SH2A targets.
8422 * function_vector
8423 Declares a function to be called using the TBR relative addressing
8424 mode. Takes an argument that specifies the slot number in the table
8425 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
8428 /* Handle a 'resbank' attribute. */
8429 static tree
8430 sh_handle_resbank_handler_attribute (tree * node, tree name,
8431 tree args ATTRIBUTE_UNUSED,
8432 int flags ATTRIBUTE_UNUSED,
8433 bool * no_add_attrs)
8435 if (!TARGET_SH2A)
8437 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8438 name);
8439 *no_add_attrs = true;
8441 if (TREE_CODE (*node) != FUNCTION_DECL)
8443 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8444 name);
8445 *no_add_attrs = true;
8448 return NULL_TREE;
8451 /* Handle an "interrupt_handler" attribute; arguments as in
8452 struct attribute_spec.handler. */
8453 static tree
8454 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8455 tree args ATTRIBUTE_UNUSED,
8456 int flags ATTRIBUTE_UNUSED,
8457 bool *no_add_attrs)
8459 if (TREE_CODE (*node) != FUNCTION_DECL)
8461 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8462 name);
8463 *no_add_attrs = true;
8466 return NULL_TREE;
8469 /* Handle an 'function_vector' attribute; arguments as in
8470 struct attribute_spec.handler. */
8471 static tree
8472 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8473 tree args ATTRIBUTE_UNUSED,
8474 int flags ATTRIBUTE_UNUSED,
8475 bool * no_add_attrs)
8477 if (!TARGET_SH2A)
8479 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8480 name);
8481 *no_add_attrs = true;
8483 else if (TREE_CODE (*node) != FUNCTION_DECL)
8485 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8486 name);
8487 *no_add_attrs = true;
8489 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8491 /* The argument must be a constant integer. */
8492 warning (OPT_Wattributes,
8493 "%qE attribute argument not an integer constant",
8494 name);
8495 *no_add_attrs = true;
8497 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8499 /* The argument value must be between 0 to 255. */
8500 warning (OPT_Wattributes,
8501 "%qE attribute argument should be between 0 to 255",
8502 name);
8503 *no_add_attrs = true;
8505 return NULL_TREE;
8508 /* Returns true if current function has been assigned the attribute
8509 'function_vector'. */
8510 bool
8511 sh2a_is_function_vector_call (rtx x)
8513 if (GET_CODE (x) == SYMBOL_REF
8514 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8516 tree tr = SYMBOL_REF_DECL (x);
8518 if (sh2a_function_vector_p (tr))
8519 return true;
8522 return false;
8525 /* Returns the function vector number, if the attribute
8526 'function_vector' is assigned, otherwise returns zero. */
8528 sh2a_get_function_vector_number (rtx x)
8530 if ((GET_CODE (x) == SYMBOL_REF)
8531 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8533 tree t = SYMBOL_REF_DECL (x);
8535 if (TREE_CODE (t) != FUNCTION_DECL)
8536 return 0;
8538 for (tree list = SH_ATTRIBUTES (t); list; list = TREE_CHAIN (list))
8539 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8540 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8542 return 0;
8544 else
8545 return 0;
8548 /* Handle an "sp_switch" attribute; arguments as in
8549 struct attribute_spec.handler. */
8550 static tree
8551 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8552 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8554 if (TREE_CODE (*node) != FUNCTION_DECL)
8556 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8557 name);
8558 *no_add_attrs = true;
8560 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8562 /* The argument must be a constant string. */
8563 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8564 name);
8565 *no_add_attrs = true;
8568 return NULL_TREE;
8571 /* Handle an "trap_exit" attribute; arguments as in
8572 struct attribute_spec.handler. */
8573 static tree
8574 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8575 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8577 if (TREE_CODE (*node) != FUNCTION_DECL)
8579 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8580 name);
8581 *no_add_attrs = true;
8583 /* The argument specifies a trap number to be used in a trapa instruction
8584 at function exit (instead of an rte instruction). */
8585 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8587 /* The argument must be a constant integer. */
8588 warning (OPT_Wattributes, "%qE attribute argument not an "
8589 "integer constant", name);
8590 *no_add_attrs = true;
8593 return NULL_TREE;
8596 static tree
8597 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8598 tree name ATTRIBUTE_UNUSED,
8599 tree args ATTRIBUTE_UNUSED,
8600 int flags ATTRIBUTE_UNUSED,
8601 bool *no_add_attrs ATTRIBUTE_UNUSED)
8603 return NULL_TREE;
8606 /* True if __attribute__((renesas)) or -mrenesas. */
8607 bool
8608 sh_attr_renesas_p (const_tree td)
8610 if (TARGET_HITACHI)
8611 return true;
8612 if (td == NULL_TREE)
8613 return false;
8614 if (DECL_P (td))
8615 td = TREE_TYPE (td);
8616 if (td == error_mark_node)
8617 return false;
8618 return lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) != NULL_TREE;
8621 /* True if __attribute__((renesas)) or -mrenesas, for the current
8622 function. */
8623 bool
8624 sh_cfun_attr_renesas_p (void)
8626 return sh_attr_renesas_p (current_function_decl);
8629 /* Returns true if the current function has the "interrupt_handler"
8630 attribute set. */
8631 bool
8632 sh_cfun_interrupt_handler_p (void)
8634 return (lookup_attribute ("interrupt_handler",
8635 DECL_ATTRIBUTES (current_function_decl))
8636 != NULL_TREE);
8639 /* Returns true if FUNC has been assigned the attribute
8640 "function_vector". */
8641 bool
8642 sh2a_function_vector_p (tree func)
8644 if (TREE_CODE (func) != FUNCTION_DECL)
8645 return false;
8647 for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list))
8648 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8649 return true;
8651 return false;
8654 /* Returns true if given tree has the "resbank" attribute set. */
8655 bool
8656 sh_cfun_resbank_handler_p (void)
8658 return ((lookup_attribute ("resbank",
8659 DECL_ATTRIBUTES (current_function_decl))
8660 != NULL_TREE)
8661 && (lookup_attribute ("interrupt_handler",
8662 DECL_ATTRIBUTES (current_function_decl))
8663 != NULL_TREE) && TARGET_SH2A);
8666 /* Returns true if the current function has a "trap_exit" attribute set. */
8667 bool
8668 sh_cfun_trap_exit_p (void)
8670 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
8671 != NULL_TREE;
8674 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8675 static const char *
8676 sh_check_pch_target_flags (int old_flags)
8678 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8679 | MASK_SH_E | MASK_HARD_SH4
8680 | MASK_FPU_SINGLE | MASK_SH4))
8681 return _("created and used with different architectures / ABIs");
8682 if ((old_flags ^ target_flags) & MASK_HITACHI)
8683 return _("created and used with different ABIs");
8684 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8685 return _("created and used with different endianness");
8686 return NULL;
8689 /* Predicates used by the templates. */
8691 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
8692 Used only in general_movsrc_operand. */
8693 bool
8694 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8696 switch (REGNO (op))
8698 case PR_REG:
8699 case MACL_REG:
8700 case MACH_REG:
8701 return true;
8703 return false;
8706 /* Returns true if OP is a floating point value with value 0.0. */
8707 bool
8708 fp_zero_operand (rtx op)
8710 if (GET_MODE (op) != SFmode)
8711 return false;
8713 const REAL_VALUE_TYPE* r = CONST_DOUBLE_REAL_VALUE (op);
8714 return real_equal (r, &dconst0) && ! REAL_VALUE_MINUS_ZERO (*r);
8717 /* Returns true if OP is a floating point value with value 1.0. */
8718 bool
8719 fp_one_operand (rtx op)
8721 if (GET_MODE (op) != SFmode)
8722 return false;
8724 return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1);
8727 /* Return the TLS type for TLS symbols. */
8728 enum tls_model
8729 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8731 if (GET_CODE (op) != SYMBOL_REF)
8732 return TLS_MODEL_NONE;
8733 return SYMBOL_REF_TLS_MODEL (op);
8736 /* Return the destination address of a branch. */
8737 static int
8738 branch_dest (rtx branch)
8740 rtx dest = SET_SRC (PATTERN (branch));
8742 if (GET_CODE (dest) == IF_THEN_ELSE)
8743 dest = XEXP (dest, 1);
8745 return INSN_ADDRESSES (INSN_UID (XEXP (dest, 0)));
8748 /* Return nonzero if REG is not used after INSN.
8749 We assume REG is a reload reg, and therefore does
8750 not live past labels. It may live past calls or jumps though. */
8751 bool
8752 reg_unused_after (rtx reg, rtx_insn *insn)
8754 /* If the reg is set by this instruction, then it is safe for our
8755 case. Disregard the case where this is a store to memory, since
8756 we are checking a register used in the store address. */
8757 rtx set = single_set (insn);
8758 if (set && !MEM_P (SET_DEST (set))
8759 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8760 return true;
8762 while ((insn = NEXT_INSN (insn)))
8764 if (!INSN_P (insn))
8765 continue;
8767 rtx_code code = GET_CODE (insn);
8769 #if 0
8770 /* If this is a label that existed before reload, then the register
8771 is dead here. However, if this is a label added by reorg, then
8772 the register may still be live here. We can't tell the difference,
8773 so we just ignore labels completely. */
8774 if (code == CODE_LABEL)
8775 return 1;
8776 /* else */
8777 #endif
8779 if (code == JUMP_INSN)
8780 return false;
8782 /* If this is a sequence, we must handle them all at once.
8783 We could have for instance a call that sets the target register,
8784 and an insn in a delay slot that uses the register. In this case,
8785 we must return 0. */
8786 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8788 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
8789 bool retval = false;
8791 for (int i = 0; i < seq->len (); i++)
8793 rtx_insn *this_insn = seq->insn (i);
8794 rtx set = single_set (this_insn);
8796 if (CALL_P (this_insn))
8797 code = CALL_INSN;
8798 else if (JUMP_P (this_insn))
8800 if (INSN_ANNULLED_BRANCH_P (this_insn))
8801 return false;
8802 code = JUMP_INSN;
8805 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8806 return false;
8807 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8809 if (!MEM_P (SET_DEST (set)))
8810 retval = true;
8811 else
8812 return false;
8814 if (set == NULL_RTX
8815 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8816 return false;
8818 if (retval)
8819 return true;
8820 else if (code == JUMP_INSN)
8821 return false;
8824 rtx set = single_set (insn);
8825 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8826 return false;
8827 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8828 return !MEM_P (SET_DEST (set));
8829 if (set == NULL && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8830 return false;
8832 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8833 return true;
8835 return true;
8839 static GTY(()) rtx t_reg_rtx;
8841 get_t_reg_rtx (void)
8843 if (! t_reg_rtx)
8844 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
8845 return t_reg_rtx;
8848 static GTY(()) tree fpscr_values;
8850 static void
8851 emit_fpu_switch (rtx scratch, int index)
8853 if (fpscr_values == NULL)
8855 tree t = build_index_type (integer_one_node);
8856 t = build_array_type (integer_type_node, t);
8857 t = build_decl (BUILTINS_LOCATION,
8858 VAR_DECL, get_identifier ("__fpscr_values"), t);
8859 DECL_ARTIFICIAL (t) = 1;
8860 DECL_IGNORED_P (t) = 1;
8861 DECL_EXTERNAL (t) = 1;
8862 TREE_STATIC (t) = 1;
8863 TREE_PUBLIC (t) = 1;
8864 TREE_USED (t) = 1;
8866 fpscr_values = t;
8869 rtx src = DECL_RTL (fpscr_values);
8870 if (!can_create_pseudo_p ())
8872 emit_move_insn (scratch, XEXP (src, 0));
8873 if (index != 0)
8874 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8875 src = adjust_automodify_address (src, SImode, scratch, index * 4);
8877 else
8878 src = adjust_address (src, SImode, index * 4);
8880 emit_insn (gen_lds_fpscr (src));
8883 static rtx get_free_reg (HARD_REG_SET);
8885 /* This function returns a register to use to load the address to load
8886 the fpscr from. Currently it always returns r1 or r7, but when we are
8887 able to use pseudo registers after combine, or have a better mechanism
8888 for choosing a register, it should be done here. */
8889 /* REGS_LIVE is the liveness information for the point for which we
8890 need this allocation. In some bare-bones exit blocks, r1 is live at the
8891 start. We can even have all of r0..r3 being live:
8892 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8893 INSN before which new insns are placed with will clobber the register
8894 we return. If a basic block consists only of setting the return value
8895 register to a pseudo and using that register, the return value is not
8896 live before or after this block, yet we we'll insert our insns right in
8897 the middle. */
8898 static rtx
8899 get_free_reg (HARD_REG_SET regs_live)
8901 if (! TEST_HARD_REG_BIT (regs_live, 1))
8902 return gen_rtx_REG (Pmode, 1);
8904 /* Hard reg 1 is live; since this is a small register classes target,
8905 there shouldn't be anything but a jump before the function end. */
8906 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8907 return gen_rtx_REG (Pmode, 7);
8910 /* This function will set the fpscr from memory.
8911 MODE is the mode we are setting it to. */
8912 void
8913 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8915 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
8916 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8918 rtx addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8919 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8922 /* Is the given character a logical line separator for the assembler? */
8923 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8924 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8925 #endif
8927 static bool
8928 sequence_insn_p (rtx_insn *insn)
8930 rtx_insn* prev = PREV_INSN (insn);
8931 if (prev == NULL)
8932 return false;
8934 rtx_insn* next = NEXT_INSN (prev);
8935 if (next == NULL)
8936 return false;
8938 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
8942 sh_insn_length_adjustment (rtx_insn *insn)
8944 /* Instructions with unfilled delay slots take up an extra two bytes for
8945 the nop in the delay slot. */
8946 if (((NONJUMP_INSN_P (insn)
8947 && GET_CODE (PATTERN (insn)) != USE
8948 && GET_CODE (PATTERN (insn)) != CLOBBER)
8949 || CALL_P (insn) || JUMP_P (insn))
8950 && ! sequence_insn_p (insn)
8951 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8952 return 2;
8954 /* Increase the insn length of a cbranch without a delay slot insn to
8955 force a delay slot which will be stuffed with a nop. */
8956 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
8957 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
8958 && ! sequence_insn_p (insn))
8959 return 2;
8961 /* sh-dsp parallel processing insn take four bytes instead of two. */
8963 if (NONJUMP_INSN_P (insn))
8965 int sum = 0;
8966 rtx body = PATTERN (insn);
8967 const char *templ;
8968 char c;
8969 bool maybe_label = true;
8971 if (GET_CODE (body) == ASM_INPUT)
8972 templ = XSTR (body, 0);
8973 else if (asm_noperands (body) >= 0)
8974 templ
8975 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8976 else
8977 return 0;
8980 int ppi_adjust = 0;
8983 c = *templ++;
8984 while (c == ' ' || c == '\t');
8985 /* all sh-dsp parallel-processing insns start with p.
8986 The only non-ppi sh insn starting with p is pref.
8987 The only ppi starting with pr is prnd. */
8988 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8989 ppi_adjust = 2;
8990 /* The repeat pseudo-insn expands two three insns, a total of
8991 six bytes in size. */
8992 else if ((c == 'r' || c == 'R')
8993 && ! strncasecmp ("epeat", templ, 5))
8994 ppi_adjust = 4;
8995 while (c && c != '\n'
8996 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
8998 /* If this is a label, it is obviously not a ppi insn. */
8999 if (c == ':' && maybe_label)
9001 ppi_adjust = 0;
9002 break;
9004 else if (c == '\'' || c == '"')
9005 maybe_label = false;
9006 c = *templ++;
9008 sum += ppi_adjust;
9009 maybe_label = c != ':';
9011 while (c);
9012 return sum;
9014 return 0;
9017 /* Return TRUE for a valid displacement for the REG+disp addressing
9018 with MODE. */
9019 bool
9020 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
9021 bool allow_zero)
9023 if (! CONST_INT_P (op))
9024 return false;
9027 const HOST_WIDE_INT offset = INTVAL (op);
9028 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
9029 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
9031 /* If the mode does not support any displacement always return false.
9032 Even though an index of '0' is actually always valid, it will cause
9033 troubles when e.g. a DFmode move is split into two SFmode moves,
9034 where one SFmode move will have index '0' and the other move will
9035 have index '4'. */
9036 if (!allow_zero && max_disp < 1)
9037 return false;
9039 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
9043 /* Recognize an RTL expression that is a valid memory address for
9044 an instruction.
9045 The MODE argument is the machine mode for the MEM expression
9046 that wants to use this address.
9047 Allow REG
9048 REG+disp
9049 REG+r0
9050 REG++
9051 --REG
9053 GBR+disp */
9054 static bool
9055 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
9057 if (REG_P (x) && REGNO (x) == GBR_REG)
9058 return true;
9060 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9061 return true;
9062 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9063 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9064 return true;
9065 else if (GET_CODE (x) == PLUS)
9067 rtx xop0 = XEXP (x, 0);
9068 rtx xop1 = XEXP (x, 1);
9070 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
9071 return gbr_displacement (xop1, mode);
9073 if (GET_MODE_SIZE (mode) <= 8
9074 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9075 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
9076 return true;
9078 if (GET_MODE_SIZE (mode) <= 4
9079 || (TARGET_FPU_DOUBLE && TARGET_FMOVD && mode == DFmode))
9081 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9082 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9083 return true;
9084 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9085 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9086 return true;
9090 return false;
9093 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9094 isn't protected by a PIC unspec. */
9095 bool
9096 nonpic_symbol_mentioned_p (rtx x)
9098 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9099 || GET_CODE (x) == PC)
9100 return true;
9102 /* We don't want to look into the possible MEM location of a
9103 CONST_DOUBLE, since we're not going to use it, in general. */
9104 if (GET_CODE (x) == CONST_DOUBLE)
9105 return false;
9107 if (GET_CODE (x) == UNSPEC
9108 && (XINT (x, 1) == UNSPEC_PIC
9109 || XINT (x, 1) == UNSPEC_GOT
9110 || XINT (x, 1) == UNSPEC_GOTOFF
9111 || XINT (x, 1) == UNSPEC_GOTPLT
9112 || XINT (x, 1) == UNSPEC_GOTTPOFF
9113 || XINT (x, 1) == UNSPEC_DTPOFF
9114 || XINT (x, 1) == UNSPEC_TPOFF
9115 || XINT (x, 1) == UNSPEC_PLT
9116 || XINT (x, 1) == UNSPEC_PCREL
9117 || XINT (x, 1) == UNSPEC_SYMOFF
9118 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
9119 || XINT (x, 1) == UNSPEC_GOTFUNCDESC
9120 || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
9121 return false;
9123 const char* fmt = GET_RTX_FORMAT (GET_CODE (x));
9124 for (int i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9126 if (fmt[i] == 'E')
9128 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
9129 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9130 return true;
9132 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9133 return true;
9136 return false;
9139 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9140 @GOTOFF in `reg'. */
9142 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg)
9144 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9145 return orig;
9147 if (GET_CODE (orig) == LABEL_REF
9148 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9150 if (reg == NULL_RTX)
9151 reg = gen_reg_rtx (Pmode);
9153 if (TARGET_FDPIC
9154 && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig))
9156 /* Weak functions may be NULL which doesn't work with
9157 GOTOFFFUNCDESC because the runtime offset is not known. */
9158 if (SYMBOL_REF_WEAK (orig))
9159 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9160 else
9161 emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
9163 else if (TARGET_FDPIC
9164 && (GET_CODE (orig) == LABEL_REF
9165 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig)
9166 && (TREE_READONLY (SYMBOL_REF_DECL (orig))
9167 || SYMBOL_REF_EXTERNAL_P (orig)
9168 || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig))))))
9169 /* In FDPIC, GOTOFF can only be used for writable data. */
9170 emit_insn (gen_symGOT2reg (reg, orig));
9171 else
9172 emit_insn (gen_symGOTOFF2reg (reg, orig));
9173 return reg;
9175 else if (GET_CODE (orig) == SYMBOL_REF)
9177 if (reg == NULL_RTX)
9178 reg = gen_reg_rtx (Pmode);
9180 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
9181 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9182 else
9183 emit_insn (gen_symGOT2reg (reg, orig));
9184 return reg;
9186 return orig;
9189 /* Given a (logical) mode size and an offset in bytes, try to find a the
9190 appropriate displacement value for a mov insn. On SH the displacements
9191 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
9192 15 bytes in QImode. To compensate this we create a new base address by
9193 adding an adjustment value to it.
9195 If the originally requested offset is greater than 127 we prefer using
9196 values 124..127 over 128..131 to increase opportunities to use the
9197 add #imm, Rn insn.
9199 In some cases it is possible that a requested offset might seem unaligned
9200 or inappropriate for the mode size, like offset = 2 and mode size = 4.
9201 This is compensated by adjusting the base address so that the effective
9202 address of the displacement move insn will be aligned.
9204 This is not the best possible way of rebasing the base address, as it
9205 does not look at other present displacement addressings around it.
9206 In some cases this can create more base address adjustments than would
9207 actually be necessary. */
9208 struct disp_adjust
9210 rtx offset_adjust;
9211 rtx mov_disp;
9214 static struct disp_adjust
9215 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
9217 struct disp_adjust res = { NULL_RTX, NULL_RTX };
9219 /* Do not try to use SH2A's large displacements here, because this would
9220 effectively disable the small displacement insns. */
9221 const int mode_sz = GET_MODE_SIZE (mode);
9222 const int mov_insn_sz = mov_insn_size (mode, false);
9223 const int max_disp = sh_max_mov_insn_displacement (mode, false);
9224 const int max_disp_next = max_disp + mov_insn_sz;
9225 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
9226 HOST_WIDE_INT offset_adjust;
9228 /* In some cases this actually does happen and we must check for it. */
9229 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
9230 return res;
9232 /* Keeps the previous behavior for QImode displacement addressing.
9233 This just decides how the offset is re-based. Removing this special
9234 case will result in slightly bigger code on average, but it's not that
9235 bad actually. */
9236 if (mov_insn_sz == 1)
9237 align_modifier = 0;
9239 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
9241 if (mode_sz + offset - offset_adjust <= max_disp_next)
9243 res.offset_adjust = GEN_INT (offset_adjust);
9244 res.mov_disp = GEN_INT (offset - offset_adjust);
9247 return res;
9250 /* Try to modify an illegitimate address and make it legitimate.
9251 If we find one, return the new, valid address.
9252 Otherwise, return the original address. */
9253 static rtx
9254 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9256 if (flag_pic)
9257 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9259 if ((TARGET_FPU_DOUBLE && mode == DFmode)
9260 || (TARGET_SH2E && mode == SFmode))
9261 return x;
9263 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
9264 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
9266 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
9267 INTVAL (XEXP (x, 1)));
9269 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9271 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9272 adj.offset_adjust, NULL_RTX, 0,
9273 OPTAB_LIB_WIDEN);
9274 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9277 return x;
9280 /* Attempt to replace *p, which is an address that needs reloading, with
9281 a valid memory address for an operand of mode MODE.
9282 Like for sh_legitimize_address, for the SH we try to get a normal form
9283 of the address. That will allow inheritance of the address reloads. */
9284 bool
9285 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9286 int itype)
9288 enum reload_type type = (enum reload_type) itype;
9289 const int mode_sz = GET_MODE_SIZE (mode);
9291 if (sh_lra_p ())
9292 return false;
9294 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
9295 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true))
9297 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
9298 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
9300 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9302 push_reload (*p, NULL_RTX, p, NULL,
9303 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9304 return true;
9307 if (TARGET_SH2E && mode == SFmode)
9309 *p = copy_rtx (*p);
9310 push_reload (*p, NULL_RTX, p, NULL,
9311 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9312 return true;
9315 /* FIXME: Do not allow to legitimize QImode and HImode displacement
9316 moves because then reload has a problem figuring the constraint
9317 that the move insn target/source reg must be R0.
9318 Or maybe some handling is wrong in sh_secondary_reload for this
9319 to work properly? */
9320 if ((mode_sz == 4 || mode_sz == 8)
9321 && ! (TARGET_SH4 && mode == DFmode)
9322 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9324 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
9325 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9326 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9327 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9328 return true;
9332 /* We must re-recognize what we created before. */
9333 if (GET_CODE (*p) == PLUS
9334 && (mode_sz == 4 || mode_sz == 8)
9335 && GET_CODE (XEXP (*p, 0)) == PLUS
9336 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9337 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9338 && CONST_INT_P (XEXP (*p, 1))
9339 && ! (TARGET_SH2E && mode == SFmode))
9341 /* Because this address is so complex, we know it must have
9342 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9343 it is already unshared, and needs no further unsharing. */
9344 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9345 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9346 return true;
9349 return false;
9352 /* In the name of slightly smaller debug output, and to cater to
9353 general assembler lossage, recognize various UNSPEC sequences
9354 and turn them back into a direct symbol reference. */
9355 static rtx
9356 sh_delegitimize_address (rtx orig_x)
9358 orig_x = delegitimize_mem_from_attrs (orig_x);
9360 rtx x = orig_x;
9361 if (MEM_P (x))
9362 x = XEXP (x, 0);
9363 if (GET_CODE (x) == CONST)
9365 rtx y = XEXP (x, 0);
9366 if (GET_CODE (y) == UNSPEC)
9368 if (XINT (y, 1) == UNSPEC_GOT
9369 || XINT (y, 1) == UNSPEC_GOTOFF
9370 || XINT (y, 1) == UNSPEC_SYMOFF)
9371 return XVECEXP (y, 0, 0);
9372 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
9374 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
9376 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
9378 if (GET_CODE (symplt) == UNSPEC
9379 && (XINT (symplt, 1) == UNSPEC_PLT
9380 || XINT (symplt, 1) == UNSPEC_PCREL))
9381 return XVECEXP (symplt, 0, 0);
9387 return orig_x;
9390 /* Mark the use of a constant in the literal table. If the constant
9391 has multiple labels, make it unique. */
9392 static rtx
9393 mark_constant_pool_use (rtx x)
9395 if (x == NULL_RTX)
9396 return x;
9398 switch (GET_CODE (x))
9400 case LABEL_REF:
9401 x = XEXP (x, 0);
9402 case CODE_LABEL:
9403 break;
9404 default:
9405 return x;
9408 /* Get the first label in the list of labels for the same constant
9409 and delete another labels in the list. */
9410 rtx_insn* lab = as_a <rtx_insn*> (x);
9411 for (rtx_insn* insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
9413 if (!LABEL_P (insn)
9414 || LABEL_REFS (insn) != NEXT_INSN (insn))
9415 break;
9416 lab = insn;
9419 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9420 as_a<rtx_insn *> (insn)->set_deleted ();
9422 /* Mark constants in a window. */
9423 for (rtx_insn* insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn;
9424 insn = NEXT_INSN (insn))
9426 if (!NONJUMP_INSN_P (insn))
9427 continue;
9429 rtx pattern = PATTERN (insn);
9430 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9431 continue;
9433 switch (XINT (pattern, 1))
9435 case UNSPECV_CONST2:
9436 case UNSPECV_CONST4:
9437 case UNSPECV_CONST8:
9438 XVECEXP (pattern, 0, 1) = const1_rtx;
9439 break;
9440 case UNSPECV_WINDOW_END:
9441 if (XVECEXP (pattern, 0, 0) == x)
9442 return lab;
9443 break;
9444 case UNSPECV_CONST_END:
9445 return lab;
9446 default:
9447 break;
9451 return lab;
9454 /* Return true if it's possible to redirect BRANCH1 to the destination
9455 of an unconditional jump BRANCH2. We only want to do this if the
9456 resulting branch will have a short displacement. */
9457 static bool
9458 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
9460 /* Don't follow if BRANCH2 is possible to be a jump crossing between
9461 hot and cold partitions. */
9462 if (flag_reorder_blocks_and_partition
9463 && simplejump_p (branch2)
9464 && CROSSING_JUMP_P (branch2))
9465 return false;
9467 if (flag_expensive_optimizations && simplejump_p (branch2))
9469 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9470 rtx_insn *insn;
9471 int distance;
9473 for (distance = 0, insn = NEXT_INSN (branch1);
9474 insn && distance < 256;
9475 insn = PREV_INSN (insn))
9477 if (insn == dest)
9478 return true;
9479 else
9480 distance += get_attr_length (insn);
9482 for (distance = 0, insn = NEXT_INSN (branch1);
9483 insn && distance < 256;
9484 insn = NEXT_INSN (insn))
9486 if (insn == dest)
9487 return true;
9488 else
9489 distance += get_attr_length (insn);
9492 return false;
9495 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9496 bool
9497 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9498 unsigned int new_reg)
9500 /* Interrupt functions can only use registers that have already been
9501 saved by the prologue, even if they would normally be
9502 call-clobbered. */
9503 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9504 return false;
9506 return true;
9509 /* Function to update the integer COST
9510 based on the relationship between INSN that is dependent on
9511 DEP_INSN through the dependence LINK. The default is to make no
9512 adjustment to COST. This can be used for example to specify to
9513 the scheduler that an output- or anti-dependence does not incur
9514 the same cost as a data-dependence. The return value should be
9515 the new value for COST. */
9516 static int
9517 sh_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
9518 unsigned int)
9520 rtx reg, use_pat;
9522 if (dep_type == 0)
9524 if (recog_memoized (insn) < 0
9525 || recog_memoized (dep_insn) < 0)
9526 return cost;
9528 rtx dep_set = single_set (dep_insn);
9530 /* The latency that we specify in the scheduling description refers
9531 to the actual output, not to an auto-increment register; for that,
9532 the latency is one. */
9533 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9535 rtx set = single_set (insn);
9537 if (set
9538 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9539 && (!MEM_P (SET_DEST (set))
9540 || !reg_mentioned_p (SET_DEST (dep_set),
9541 XEXP (SET_DEST (set), 0))))
9542 cost = 1;
9544 /* The only input for a call that is timing-critical is the
9545 function's address. */
9546 if (CALL_P (insn))
9548 rtx call = get_call_rtx_from (insn);
9549 if (call
9550 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9551 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9552 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9553 cost -= TARGET_SH4_300 ? 3 : 6;
9555 /* Likewise, the most timing critical input for an sfuncs call
9556 is the function address. However, sfuncs typically start
9557 using their arguments pretty quickly.
9558 Assume a four cycle delay for SH4 before they are needed.
9559 Cached ST40-300 calls are quicker, so assume only a one
9560 cycle delay there.
9561 ??? Maybe we should encode the delays till input registers
9562 are needed by sfuncs into the sfunc call insn. */
9563 /* All sfunc calls are parallels with at least four components.
9564 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9565 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9566 && XVECLEN (PATTERN (insn), 0) >= 4
9567 && (reg = sfunc_uses_reg (insn)))
9569 if (! reg_set_p (reg, dep_insn))
9570 cost -= TARGET_SH4_300 ? 1 : 4;
9572 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9574 attr_type dep_type = get_attr_type (dep_insn);
9575 attr_type type;
9576 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9577 cost--;
9578 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9579 && (type = get_attr_type (insn)) != TYPE_CALL
9580 && type != TYPE_SFUNC)
9581 cost--;
9582 /* When the preceding instruction loads the shift amount of
9583 the following SHAD/SHLD, the latency of the load is increased
9584 by 1 cycle. */
9585 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9586 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9587 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9588 XEXP (SET_SRC (single_set (insn)),
9589 1)))
9590 cost++;
9591 /* When an LS group instruction with a latency of less than
9592 3 cycles is followed by a double-precision floating-point
9593 instruction, FIPR, or FTRV, the latency of the first
9594 instruction is increased to 3 cycles. */
9595 else if (cost < 3
9596 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9597 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9598 cost = 3;
9599 /* The lsw register of a double-precision computation is ready one
9600 cycle earlier. */
9601 else if (reload_completed
9602 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9603 && (use_pat = single_set (insn))
9604 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9605 SET_SRC (use_pat)))
9606 cost -= 1;
9608 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9609 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9610 cost -= 1;
9612 else if (TARGET_SH4_300)
9614 /* Stores need their input register two cycles later. */
9615 attr_type type;
9616 if (dep_set && cost >= 1
9617 && ((type = get_attr_type (insn)) == TYPE_STORE
9618 || type == TYPE_PSTORE
9619 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9621 rtx set = single_set (insn);
9623 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9624 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9626 cost -= 2;
9627 /* But don't reduce the cost below 1 if the address depends
9628 on a side effect of dep_insn. */
9629 if (cost < 1
9630 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9631 cost = 1;
9636 /* An anti-dependence penalty of two applies if the first insn is a double
9637 precision fadd / fsub / fmul. */
9638 else if (!TARGET_SH4_300
9639 && dep_type == REG_DEP_ANTI
9640 && recog_memoized (dep_insn) >= 0
9641 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9642 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9643 /* A lot of alleged anti-flow dependences are fake,
9644 so check this one is real. */
9645 && flow_dependent_p (dep_insn, insn))
9646 cost = 2;
9648 return cost;
9651 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9652 if DEP_INSN is anti-flow dependent on INSN. */
9653 static bool
9654 flow_dependent_p (rtx insn, rtx dep_insn)
9656 rtx tmp = PATTERN (insn);
9658 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9659 return tmp == NULL_RTX;
9662 /* A helper function for flow_dependent_p called through note_stores. */
9663 static void
9664 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9666 rtx * pinsn = (rtx *) data;
9668 if (*pinsn && reg_referenced_p (x, *pinsn))
9669 *pinsn = NULL_RTX;
9672 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9673 'special function' patterns (type sfunc) that clobber pr, but that
9674 do not look like function calls to leaf_function_p. Hence we must
9675 do this extra check. */
9676 static int
9677 sh_pr_n_sets (void)
9679 return DF_REG_DEF_COUNT (PR_REG);
9682 /* Return where to allocate pseudo for a given hard register initial
9683 value. */
9684 static rtx
9685 sh_allocate_initial_value (rtx hard_reg)
9687 if (REGNO (hard_reg) == PR_REG)
9689 if (crtl->is_leaf && ! sh_pr_n_sets ())
9690 return hard_reg;
9691 else
9692 return gen_frame_mem (Pmode, return_address_pointer_rtx);
9695 return NULL_RTX;
9698 /* This function returns "2" to indicate dual issue for the SH4
9699 processor. To be used by the DFA pipeline description. */
9700 static int
9701 sh_issue_rate (void)
9703 if (TARGET_SUPERSCALAR)
9704 return 2;
9705 else
9706 return 1;
9709 /* Functions for ready queue reordering for sched1. */
9711 /* Get weight for mode for a set x. */
9712 static short
9713 find_set_regmode_weight (rtx x, machine_mode mode)
9715 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9716 return 1;
9717 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9719 if (REG_P (SET_DEST (x)))
9721 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9722 return 1;
9723 else
9724 return 0;
9726 return 1;
9728 return 0;
9731 /* Get regmode weight for insn. */
9732 static short
9733 find_insn_regmode_weight (rtx insn, machine_mode mode)
9735 /* Increment weight for each register born here. */
9736 rtx x = PATTERN (insn);
9737 short reg_weight = find_set_regmode_weight (x, mode);
9738 if (GET_CODE (x) == PARALLEL)
9740 int j;
9741 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9743 x = XVECEXP (PATTERN (insn), 0, j);
9744 reg_weight += find_set_regmode_weight (x, mode);
9747 /* Decrement weight for each register that dies here. */
9748 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9750 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9752 rtx note = XEXP (x, 0);
9753 if (REG_P (note) && GET_MODE (note) == mode)
9754 reg_weight--;
9757 return reg_weight;
9760 /* Calculate regmode weights for all insns of a basic block. */
9761 static void
9762 find_regmode_weight (basic_block b, machine_mode mode)
9764 rtx_insn *insn, *next_tail, *head, *tail;
9766 get_ebb_head_tail (b, b, &head, &tail);
9767 next_tail = NEXT_INSN (tail);
9769 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9771 /* Handle register life information. */
9772 if (!INSN_P (insn))
9773 continue;
9775 if (mode == SFmode)
9776 INSN_REGMODE_WEIGHT (insn, mode) =
9777 find_insn_regmode_weight (insn, mode)
9778 + 2 * find_insn_regmode_weight (insn, DFmode);
9779 else if (mode == SImode)
9780 INSN_REGMODE_WEIGHT (insn, mode) =
9781 find_insn_regmode_weight (insn, mode)
9782 + 2 * find_insn_regmode_weight (insn, DImode);
9786 /* Comparison function for ready queue sorting. */
9787 static int
9788 rank_for_reorder (const void *x, const void *y)
9790 rtx_insn *tmp = *(rtx_insn * const *) y;
9791 rtx_insn *tmp2 = *(rtx_insn * const *) x;
9793 /* The insn in a schedule group should be issued the first. */
9794 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9795 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9797 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9798 minimizes instruction movement, thus minimizing sched's effect on
9799 register pressure. */
9800 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9803 /* Resort the array A in which only element at index N may be out of order. */
9804 static void
9805 swap_reorder (rtx_insn **a, int n)
9807 rtx_insn *insn = a[n - 1];
9808 int i = n - 2;
9810 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9812 a[i + 1] = a[i];
9813 i -= 1;
9815 a[i + 1] = insn;
9818 /* Sort the ready list by ascending priority. */
9819 static void
9820 ready_reorder (rtx_insn **ready, int nready)
9822 if (nready == 2)
9823 swap_reorder (ready, nready);
9824 else if (nready > 2)
9825 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
9828 /* Count life regions of r0 for a block. */
9829 static int
9830 find_r0_life_regions (basic_block b)
9832 bool live;
9833 int set;
9834 int death = 0;
9836 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9838 set = 1;
9839 live = true;
9841 else
9843 set = 0;
9844 live = false;
9847 rtx_insn* insn = BB_HEAD (b);
9848 rtx_insn* end = BB_END (b);
9849 rtx r0_reg = gen_rtx_REG (SImode, R0_REG);
9850 while (1)
9852 if (INSN_P (insn))
9854 if (find_regno_note (insn, REG_DEAD, R0_REG))
9856 death++;
9857 live = false;
9860 rtx pset;
9861 if (!live
9862 && (pset = single_set (insn))
9863 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9864 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9866 set++;
9867 live = true;
9870 if (insn == end)
9871 break;
9872 insn = NEXT_INSN (insn);
9874 return set - death;
9877 /* Calculate regmode weights for all insns of all basic block. */
9878 static void
9879 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9880 int verbose ATTRIBUTE_UNUSED,
9881 int old_max_uid)
9883 basic_block b;
9885 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9886 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9887 r0_life_regions = 0;
9889 FOR_EACH_BB_REVERSE_FN (b, cfun)
9891 find_regmode_weight (b, SImode);
9892 find_regmode_weight (b, SFmode);
9893 if (!reload_completed)
9894 r0_life_regions += find_r0_life_regions (b);
9897 CURR_REGMODE_PRESSURE (SImode) = 0;
9898 CURR_REGMODE_PRESSURE (SFmode) = 0;
9901 /* Cleanup. */
9902 static void
9903 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9904 int verbose ATTRIBUTE_UNUSED)
9906 if (regmode_weight[0])
9908 free (regmode_weight[0]);
9909 regmode_weight[0] = NULL;
9911 if (regmode_weight[1])
9913 free (regmode_weight[1]);
9914 regmode_weight[1] = NULL;
9918 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9919 keep count of register pressures on SImode and SFmode. */
9920 static int
9921 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9922 int sched_verbose ATTRIBUTE_UNUSED,
9923 rtx_insn *insn,
9924 int can_issue_more)
9926 if (GET_CODE (PATTERN (insn)) != USE
9927 && GET_CODE (PATTERN (insn)) != CLOBBER)
9928 cached_can_issue_more = can_issue_more - 1;
9929 else
9930 cached_can_issue_more = can_issue_more;
9932 if (reload_completed)
9933 return cached_can_issue_more;
9935 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9936 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9938 return cached_can_issue_more;
9941 static void
9942 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9943 int verbose ATTRIBUTE_UNUSED,
9944 int veclen ATTRIBUTE_UNUSED)
9946 CURR_REGMODE_PRESSURE (SImode) = 0;
9947 CURR_REGMODE_PRESSURE (SFmode) = 0;
9950 /* Some magic numbers. */
9951 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9952 functions that already have high pressure on r0. */
9953 #define R0_MAX_LIFE_REGIONS 2
9954 /* Register Pressure thresholds for SImode and SFmode registers. */
9955 #define SIMODE_MAX_WEIGHT 5
9956 #define SFMODE_MAX_WEIGHT 10
9958 /* Return true if the pressure is high for MODE. */
9959 static bool
9960 high_pressure (machine_mode mode)
9962 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9963 functions that already have high pressure on r0. */
9964 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9965 return true;
9967 if (mode == SFmode)
9968 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9969 else
9970 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9973 /* Reorder ready queue if register pressure is high. */
9974 static int
9975 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9976 int sched_verbose ATTRIBUTE_UNUSED,
9977 rtx_insn **ready,
9978 int *n_readyp,
9979 int clock_var ATTRIBUTE_UNUSED)
9981 if (reload_completed)
9982 return sh_issue_rate ();
9984 if (high_pressure (SFmode) || high_pressure (SImode))
9986 ready_reorder (ready, *n_readyp);
9989 return sh_issue_rate ();
9992 /* Skip cycles if the current register pressure is high. */
9993 static int
9994 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9995 int sched_verbose ATTRIBUTE_UNUSED,
9996 rtx_insn **ready ATTRIBUTE_UNUSED,
9997 int *n_readyp ATTRIBUTE_UNUSED,
9998 int clock_var ATTRIBUTE_UNUSED)
10000 if (reload_completed)
10001 return cached_can_issue_more;
10003 if (high_pressure(SFmode) || high_pressure (SImode))
10004 skip_cycles = 1;
10006 return cached_can_issue_more;
10009 /* Skip cycles without sorting the ready queue. This will move insn from
10010 Q->R. If this is the last cycle we are skipping; allow sorting of ready
10011 queue by sh_reorder. */
10013 /* Generally, skipping these many cycles are sufficient for all insns to move
10014 from Q -> R. */
10015 #define MAX_SKIPS 8
10017 static int
10018 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10019 int sched_verbose ATTRIBUTE_UNUSED,
10020 rtx_insn *insn ATTRIBUTE_UNUSED,
10021 int last_clock_var,
10022 int clock_var,
10023 int *sort_p)
10025 if (reload_completed)
10026 return 0;
10028 if (skip_cycles)
10030 if ((clock_var - last_clock_var) < MAX_SKIPS)
10032 *sort_p = 0;
10033 return 1;
10035 /* If this is the last cycle we are skipping, allow reordering of R. */
10036 if ((clock_var - last_clock_var) == MAX_SKIPS)
10038 *sort_p = 1;
10039 return 1;
10043 skip_cycles = 0;
10045 return 0;
10048 static bool
10049 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10051 return TARGET_HITACHI || sh_attr_renesas_p (record_type);
10055 On the SH1..SH4, the trampoline looks like
10056 2 0002 D202 mov.l l2,r2
10057 1 0000 D301 mov.l l1,r3
10058 3 0004 422B jmp @r2
10059 4 0006 0009 nop
10060 5 0008 00000000 l1: .long area
10061 6 000c 00000000 l2: .long function
10063 FDPIC needs a form that includes a function descriptor and
10064 code to load the GOT register:
10065 0 0000 00000000 .long l0
10066 1 0004 00000000 .long gotval
10067 2 0008 D302 l0: mov.l l1,r3
10068 3 000a D203 mov.l l2,r2
10069 4 000c 6122 mov.l @r2,r1
10070 5 000e 5C21 mov.l @(4,r2),r12
10071 6 0010 412B jmp @r1
10072 7 0012 0009 nop
10073 8 0014 00000000 l1: .long area
10074 9 0018 00000000 l2: .long function
10076 SH5 (compact) uses r1 instead of r3 for the static chain. */
10078 /* Emit insns to store a value at memory address + offset. */
10079 static void
10080 sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value)
10082 gcc_assert ((offset & 3) == 0);
10083 emit_move_insn (offset == 0
10084 ? change_address (addr, SImode, NULL_RTX)
10085 : adjust_address (addr, SImode, offset), value);
10088 /* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2. */
10089 static void
10090 sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1)
10092 sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN
10093 ? (w0 | (w1 << 16))
10094 : (w1 | (w0 << 16)), SImode));
10097 /* Emit RTL insns to initialize the variable parts of a trampoline.
10098 FNADDR is an RTX for the address of the function's pure code.
10099 CXT is an RTX for the static chain value for the function. */
10100 static void
10101 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10103 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10104 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10106 if (TARGET_FDPIC)
10108 rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
10110 sh_emit_storesi (tramp_mem, 0, a);
10111 sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ());
10113 sh_emit_storehi (tramp_mem, 8, 0xd302, 0xd203);
10114 sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21);
10115 sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009);
10117 sh_emit_storesi (tramp_mem, 20, cxt);
10118 sh_emit_storesi (tramp_mem, 24, fnaddr);
10120 else
10122 sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301);
10123 sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009);
10125 sh_emit_storesi (tramp_mem, 8, cxt);
10126 sh_emit_storesi (tramp_mem, 12, fnaddr);
10128 if (TARGET_HARD_SH4)
10130 if (!TARGET_INLINE_IC_INVALIDATE
10131 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
10132 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10133 FUNCTION_ORDINARY).sym,
10134 LCT_NORMAL, VOIDmode, tramp, SImode);
10135 else
10136 emit_insn (gen_ic_invalidate_line (tramp));
10140 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10141 static rtx
10142 sh_trampoline_adjust_address (rtx tramp)
10144 return tramp;
10147 /* If PIC, we cannot make sibling calls to global functions
10148 because the PLT requires r12 to be live. */
10149 static bool
10150 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10152 return (1
10153 && ! sh_cfun_interrupt_handler_p ()
10154 && (! flag_pic || TARGET_FDPIC
10155 || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
10156 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10159 /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */
10160 void
10161 sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
10163 const_tree decl = SYMBOL_REF_DECL (sym);
10164 bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
10166 if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
10167 emit_insn (gen_sym_label2reg (reg, sym, lab));
10168 else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
10169 emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
10170 else
10171 emit_insn (gen_symPLT_label2reg (reg, sym, lab));
10174 /* Machine specific built-in functions. */
10176 struct builtin_description
10178 bool (* const is_enabled) (void);
10179 const enum insn_code icode;
10180 const char *const name;
10181 int signature;
10182 tree fndecl;
10185 /* This function can be used if there are any built-ins that are not for
10186 SHmedia. It's commented out to avoid the defined-but-unused warning. */
10187 static bool
10188 sh1_builtin_p (void)
10190 return TARGET_SH1;
10193 /* describe number and signedness of arguments; arg[0] == result
10194 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10195 /* 9: 64-bit pointer, 10: 32-bit pointer */
10196 static const char signature_args[][4] =
10198 #define SH_BLTIN_V2SI2 0
10199 { 4, 4 },
10200 #define SH_BLTIN_V4HI2 1
10201 { 4, 4 },
10202 #define SH_BLTIN_V2SI3 2
10203 { 4, 4, 4 },
10204 #define SH_BLTIN_V4HI3 3
10205 { 4, 4, 4 },
10206 #define SH_BLTIN_V8QI3 4
10207 { 4, 4, 4 },
10208 #define SH_BLTIN_MAC_HISI 5
10209 { 1, 4, 4, 1 },
10210 #define SH_BLTIN_SH_HI 6
10211 { 4, 4, 1 },
10212 #define SH_BLTIN_SH_SI 7
10213 { 4, 4, 1 },
10214 #define SH_BLTIN_V4HI2V2SI 8
10215 { 4, 4, 4 },
10216 #define SH_BLTIN_V4HI2V8QI 9
10217 { 4, 4, 4 },
10218 #define SH_BLTIN_SISF 10
10219 { 4, 2 },
10220 #define SH_BLTIN_LDUA_L 11
10221 { 2, 10 },
10222 #define SH_BLTIN_LDUA_Q 12
10223 { 1, 10 },
10224 #define SH_BLTIN_STUA_L 13
10225 { 0, 10, 2 },
10226 #define SH_BLTIN_STUA_Q 14
10227 { 0, 10, 1 },
10228 #define SH_BLTIN_LDUA_L64 15
10229 { 2, 9 },
10230 #define SH_BLTIN_LDUA_Q64 16
10231 { 1, 9 },
10232 #define SH_BLTIN_STUA_L64 17
10233 { 0, 9, 2 },
10234 #define SH_BLTIN_STUA_Q64 18
10235 { 0, 9, 1 },
10236 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10237 #define SH_BLTIN_2 19
10238 #define SH_BLTIN_SU 19
10239 { 1, 2 },
10240 #define SH_BLTIN_3 20
10241 #define SH_BLTIN_SUS 20
10242 { 2, 2, 1 },
10243 #define SH_BLTIN_PSSV 21
10244 { 0, 8, 2, 2 },
10245 #define SH_BLTIN_XXUU 22
10246 #define SH_BLTIN_UUUU 22
10247 { 1, 1, 1, 1 },
10248 #define SH_BLTIN_PV 23
10249 { 0, 8 },
10250 #define SH_BLTIN_VP 24
10251 { 8, 0 },
10252 #define SH_BLTIN_UV 25
10253 { 1, 0 },
10254 #define SH_BLTIN_VU 26
10255 { 0, 1 },
10257 /* mcmv: operands considered unsigned. */
10258 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10259 /* mperm: control value considered unsigned int. */
10260 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10261 /* mshards_q: returns signed short. */
10262 /* nsb: takes long long arg, returns unsigned char. */
10263 static struct builtin_description bdesc[] =
10265 { sh1_builtin_p,
10266 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
10267 { sh1_builtin_p,
10268 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
10271 static tree sh_builtin_get_fpscr;
10272 static tree sh_builtin_set_fpscr;
10274 static void
10275 sh_init_builtins (void)
10277 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10278 memset (shared, 0, sizeof shared);
10280 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
10282 builtin_description* d = &bdesc[di];
10284 if (!d->is_enabled ())
10285 continue;
10287 tree type, arg_type = NULL_TREE;
10288 int signature = d->signature;
10290 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10291 type = shared[signature];
10292 else
10294 int has_result = signature_args[signature][0] != 0;
10295 tree args[3];
10297 if (! TARGET_FPU_ANY
10298 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10299 continue;
10300 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
10301 args[i] = NULL_TREE;
10302 for (int i = 3; ; i--)
10304 int arg = signature_args[signature][i];
10305 int opno = i - 1 + has_result;
10307 if (arg & 8)
10308 arg_type = ptr_type_node;
10309 else if (arg)
10310 arg_type = (*lang_hooks.types.type_for_mode)
10311 (insn_data[d->icode].operand[opno].mode, (arg & 1));
10312 else if (i)
10313 continue;
10314 else
10315 arg_type = void_type_node;
10316 if (i == 0)
10317 break;
10318 args[i-1] = arg_type;
10320 type = build_function_type_list (arg_type, args[0], args[1],
10321 args[2], NULL_TREE);
10322 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10323 shared[signature] = type;
10325 d->fndecl =
10326 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10327 NULL, NULL_TREE);
10328 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
10329 if (d->icode == CODE_FOR_sts_fpscr)
10330 sh_builtin_get_fpscr = d->fndecl;
10331 else if (d->icode == CODE_FOR_set_fpscr)
10332 sh_builtin_set_fpscr = d->fndecl;
10336 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
10338 static void
10339 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
10341 const unsigned SH_FE_INVALID = 64;
10342 const unsigned SH_FE_DIVBYZERO = 32;
10343 const unsigned SH_FE_OVERFLOW = 16;
10344 const unsigned SH_FE_UNDERFLOW = 8;
10345 const unsigned SH_FE_INEXACT = 4;
10346 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
10347 | SH_FE_DIVBYZERO
10348 | SH_FE_OVERFLOW
10349 | SH_FE_UNDERFLOW
10350 | SH_FE_INEXACT);
10351 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
10352 tree fenv_var, mask, ld_fenv, masked_fenv;
10353 tree new_fenv_var, reload_fenv, restore_fnenv;
10354 tree update_call, atomic_feraiseexcept, hold_fnclex;
10356 if (! TARGET_FPU_ANY)
10357 return;
10359 /* Generate the equivalent of :
10360 unsigned int fenv_var;
10361 fenv_var = __builtin_sh_get_fpscr ();
10363 unsigned int masked_fenv;
10364 masked_fenv = fenv_var & mask;
10366 __builtin_sh_set_fpscr (masked_fenv); */
10368 fenv_var = create_tmp_var_raw (unsigned_type_node);
10369 mask = build_int_cst (unsigned_type_node,
10370 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
10371 | SH_FE_ALL_EXCEPT));
10372 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
10373 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
10374 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
10375 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10376 fenv_var = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
10377 build2 (COMPOUND_EXPR, void_type_node, masked_fenv,
10378 ld_fenv),
10379 NULL_TREE, NULL_TREE);
10380 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, hold_fnclex);
10382 /* Store the value of masked_fenv to clear the exceptions:
10383 __builtin_sh_set_fpscr (masked_fenv); */
10385 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10387 /* Generate the equivalent of :
10388 unsigned int new_fenv_var;
10389 new_fenv_var = __builtin_sh_get_fpscr ();
10391 __builtin_sh_set_fpscr (fenv_var);
10393 __atomic_feraiseexcept (new_fenv_var); */
10395 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
10396 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
10397 build_call_expr (sh_builtin_get_fpscr, 0));
10398 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
10399 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
10400 update_call = build_call_expr (atomic_feraiseexcept, 1,
10401 fold_convert (integer_type_node,
10402 new_fenv_var));
10403 *update = build2 (COMPOUND_EXPR, void_type_node,
10404 build2 (COMPOUND_EXPR, void_type_node,
10405 reload_fenv, restore_fnenv), update_call);
10408 /* Implements target hook vector_mode_supported_p. */
10409 bool
10410 sh_vector_mode_supported_p (machine_mode mode ATTRIBUTE_UNUSED)
10412 return false;
10415 bool
10416 sh_frame_pointer_required (void)
10418 /* If needed override this in other tm.h files to cope with various OS
10419 lossage requiring a frame pointer. */
10420 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10421 return true;
10423 if (crtl->profile)
10424 return true;
10426 return false;
10429 /* Implements target hook dwarf_calling_convention. Return an enum
10430 of dwarf_calling_convention. */
10432 sh_dwarf_calling_convention (const_tree func)
10434 if (sh_attr_renesas_p (func))
10435 return DW_CC_GNU_renesas_sh;
10437 return DW_CC_normal;
10440 /* Returns the sh builtin decl for CODE. */
10441 static tree
10442 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10444 if (code >= ARRAY_SIZE (bdesc))
10445 return error_mark_node;
10447 if (!bdesc[code].is_enabled ())
10448 return error_mark_node;
10450 return bdesc[code].fndecl;
10453 /* Expand an expression EXP that calls a built-in function,
10454 with result going to TARGET if that's convenient
10455 (and in mode MODE if that's convenient).
10456 SUBTARGET may be used as the target for computing one of EXP's operands.
10457 IGNORE is nonzero if the value is to be ignored. */
10458 static rtx
10459 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10460 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10462 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10463 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10464 const struct builtin_description *d = &bdesc[fcode];
10465 enum insn_code icode = d->icode;
10466 int signature = d->signature;
10467 int nop = 0;
10468 rtx op[4];
10470 if (signature_args[signature][0])
10472 if (ignore)
10473 return NULL_RTX;
10475 machine_mode tmode = insn_data[icode].operand[0].mode;
10476 if (! target || GET_MODE (target) != tmode
10477 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10478 target = gen_reg_rtx (tmode);
10479 op[nop++] = target;
10481 else
10482 target = NULL_RTX;
10484 for (int i = 1; i <= 3; i++, nop++)
10486 if (! signature_args[signature][i])
10487 break;
10488 tree arg = CALL_EXPR_ARG (exp, i - 1);
10489 if (arg == error_mark_node)
10490 return const0_rtx;
10492 machine_mode opmode;
10493 tree optype;
10494 if (signature_args[signature][i] & 8)
10496 opmode = ptr_mode;
10497 optype = ptr_type_node;
10499 else
10501 opmode = insn_data[icode].operand[nop].mode;
10502 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10505 machine_mode argmode = TYPE_MODE (TREE_TYPE (arg));
10506 if (argmode != opmode)
10507 arg = build1 (NOP_EXPR, optype, arg);
10508 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10509 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10510 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10513 rtx pat = NULL_RTX;
10515 switch (nop)
10517 case 1:
10518 pat = (*insn_data[d->icode].genfun) (op[0]);
10519 break;
10520 case 2:
10521 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10522 break;
10523 case 3:
10524 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10525 break;
10526 case 4:
10527 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10528 break;
10529 default:
10530 gcc_unreachable ();
10532 if (! pat)
10533 return NULL_RTX;
10534 emit_insn (pat);
10535 return target;
10538 /* Implement TARGET_HARD_REGNO_NREGS. On the SH all but the XD regs are
10539 UNITS_PER_WORD bits wide. */
10541 static unsigned int
10542 sh_hard_regno_nregs (unsigned int regno, machine_mode mode)
10544 if (XD_REGISTER_P (regno))
10545 return CEIL (GET_MODE_SIZE (mode), 2 * UNITS_PER_WORD);
10546 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
10549 /* Implement TARGET_HARD_REGNO_MODE_OK.
10551 We can allow any mode in any general register. The special registers
10552 only allow SImode. Don't allow any mode in the PR.
10554 We cannot hold DCmode values in the XD registers because alter_reg
10555 handles subregs of them incorrectly. We could work around this by
10556 spacing the XD registers like the DR registers, but this would require
10557 additional memory in every compilation to hold larger register vectors.
10558 We could hold SFmode / SCmode values in XD registers, but that
10559 would require a tertiary reload when reloading from / to memory,
10560 and a secondary reload to reload from / to general regs; that
10561 seems to be a losing proposition.
10563 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10564 it won't be ferried through GP registers first. */
10565 static bool
10566 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10568 if (SPECIAL_REGISTER_P (regno))
10569 return mode == SImode;
10571 if (regno == FPUL_REG)
10572 return (mode == SImode || mode == SFmode);
10574 if (FP_REGISTER_P (regno) && mode == SFmode)
10575 return true;
10577 if (mode == V2SFmode)
10579 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10580 || GENERAL_REGISTER_P (regno)))
10581 return true;
10582 else
10583 return false;
10586 if (mode == V4SFmode)
10588 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10589 || GENERAL_REGISTER_P (regno))
10590 return true;
10591 else
10592 return false;
10595 if (mode == V16SFmode)
10596 return regno == FIRST_XD_REG;
10598 if (FP_REGISTER_P (regno))
10600 if (mode == SFmode
10601 || mode == SImode
10602 || ((TARGET_SH2E) && mode == SCmode)
10603 || (((TARGET_FPU_DOUBLE && mode == DFmode) || mode == DCmode)
10604 && ((regno - FIRST_FP_REG) & 1) == 0)
10605 || (TARGET_SH4 && mode == TImode
10606 && ((regno - FIRST_FP_REG) & 3) == 0))
10607 return true;
10608 else
10609 return false;
10612 if (XD_REGISTER_P (regno))
10613 return mode == DFmode;
10615 if (regno == PR_REG)
10616 return mode == SImode;
10618 if (regno == FPSCR_REG)
10619 return mode == SImode;
10621 return true;
10624 /* Implement TARGET_MODES_TIEABLE_P.
10626 If TARGET_HARD_REGNO_MODE_OK could produce different values for MODE1
10627 and MODE2, for any hard reg, then this must be false for correct output.
10628 That's the case for xd registers: we don't hold SFmode values in
10629 them, so we can't tie an SFmode pseudos with one in another
10630 floating-point mode. */
10632 static bool
10633 sh_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10635 return (mode1 == mode2
10636 || (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
10637 && (mode1 != SFmode && mode2 != SFmode)));
10640 /* Specify the modes required to caller save a given hard regno.
10641 choose_hard_reg_mode chooses mode based on TARGET_HARD_REGNO_MODE_OK
10642 and returns ?Imode for float regs when sh_hard_regno_mode_ok
10643 permits integer modes on them. That makes LRA's split process
10644 unhappy. See PR55212.
10646 machine_mode
10647 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
10648 machine_mode mode)
10650 if (FP_REGISTER_P (regno)
10651 && (mode == SFmode
10652 || mode == SCmode
10653 || ((mode == DFmode || mode == DCmode)
10654 && ((regno - FIRST_FP_REG) & 1) == 0)))
10655 return mode;
10657 return choose_hard_reg_mode (regno, nregs, false);
10660 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10661 static bool
10662 sh_can_change_mode_class (machine_mode from, machine_mode to,
10663 reg_class_t rclass)
10665 /* We want to enable the use of SUBREGs as a means to
10666 VEC_SELECT a single element of a vector. */
10668 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
10669 This can be problematic when SFmode vector subregs need to be accessed
10670 on the stack with displacement addressing, as it happens with -O0.
10671 Thus we disallow the mode change for -O0. */
10672 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10673 return optimize ? !reg_classes_intersect_p (GENERAL_REGS, rclass) : true;
10675 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10677 if (TARGET_LITTLE_ENDIAN)
10679 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10680 return !reg_classes_intersect_p (DF_REGS, rclass);
10682 else
10684 if (GET_MODE_SIZE (from) < 8)
10685 return !reg_classes_intersect_p (DF_REGS, rclass);
10688 return true;
10691 /* Return true if registers in machine mode MODE will likely be
10692 allocated to registers in small register classes. */
10693 bool
10694 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
10696 return true;
10699 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10700 that label is used. */
10701 void
10702 sh_mark_label (rtx address, int nuses)
10704 if (GOTOFF_P (address))
10706 /* Extract the label or symbol. */
10707 address = XEXP (address, 0);
10708 if (GET_CODE (address) == PLUS)
10709 address = XEXP (address, 0);
10710 address = XVECEXP (address, 0, 0);
10712 if (GET_CODE (address) == LABEL_REF
10713 && LABEL_P (XEXP (address, 0)))
10714 LABEL_NUSES (XEXP (address, 0)) += nuses;
10717 /* Compute extra cost of moving data between one register class
10718 and another.
10720 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10721 uses this information. Hence, the general register <-> floating point
10722 register information here is not used for SFmode. */
10723 static int
10724 sh_register_move_cost (machine_mode mode,
10725 reg_class_t srcclass, reg_class_t dstclass)
10727 if (dstclass == T_REGS || dstclass == PR_REGS)
10728 return 10;
10730 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10731 return 4;
10733 if (mode == SImode && TARGET_FMOVD
10734 && REGCLASS_HAS_FP_REG (srcclass)
10735 && REGCLASS_HAS_FP_REG (dstclass))
10736 return 4;
10738 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10739 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10741 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10742 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10743 return 9;
10745 if ((REGCLASS_HAS_FP_REG (dstclass)
10746 && REGCLASS_HAS_GENERAL_REG (srcclass))
10747 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10748 && REGCLASS_HAS_FP_REG (srcclass)))
10750 /* Discourage trying to use fp regs for a pointer. This also
10751 discourages fp regs with SImode because Pmode is an alias
10752 of SImode on this target. See PR target/48596. */
10753 int addend = (mode == Pmode) ? 40 : 0;
10755 return ((TARGET_FMOVD ? 8 : 12) + addend)
10756 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10759 if ((dstclass == FPUL_REGS
10760 && REGCLASS_HAS_GENERAL_REG (srcclass))
10761 || (srcclass == FPUL_REGS
10762 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10763 return 5;
10765 if ((dstclass == FPUL_REGS
10766 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10767 || (srcclass == FPUL_REGS
10768 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10769 return 7;
10771 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10772 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10773 return 4;
10775 if (TARGET_FMOVD
10776 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10777 && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10778 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10780 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10783 static rtx
10784 emit_load_ptr (rtx reg, rtx addr)
10786 rtx mem = gen_const_mem (ptr_mode, addr);
10788 if (Pmode != ptr_mode)
10789 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10790 return emit_move_insn (reg, mem);
10793 static void
10794 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10795 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10796 tree function)
10798 CUMULATIVE_ARGS cum;
10799 int structure_value_byref = 0;
10800 rtx this_rtx, this_value, sibcall, funexp;
10801 rtx_insn *insns;
10802 tree funtype = TREE_TYPE (function);
10803 int simple_add = CONST_OK_FOR_ADD (delta);
10804 int did_load = 0;
10805 rtx scratch0, scratch1, scratch2;
10807 reload_completed = 1;
10808 epilogue_completed = 1;
10809 crtl->uses_only_leaf_regs = 1;
10811 emit_note (NOTE_INSN_PROLOGUE_END);
10813 /* Find the "this" pointer. We have such a wide range of ABIs for the
10814 SH that it's best to do this completely machine independently.
10815 "this" is passed as first argument, unless a structure return pointer
10816 comes first, in which case "this" comes second. */
10817 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10818 #ifndef PCC_STATIC_STRUCT_RETURN
10819 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10820 structure_value_byref = 1;
10821 #endif /* not PCC_STATIC_STRUCT_RETURN */
10822 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10824 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10826 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
10828 this_rtx
10829 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
10831 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10832 static chain pointer (even if you can't have nested virtual functions
10833 right now, someone might implement them sometime), and the rest of the
10834 registers are used for argument passing, are callee-saved, or reserved. */
10835 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10836 -ffixed-reg has been used. */
10837 if (! call_used_regs[0] || fixed_regs[0])
10838 error ("r0 needs to be available as a call-clobbered register");
10839 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10842 if (call_used_regs[1] && ! fixed_regs[1])
10843 scratch1 = gen_rtx_REG (ptr_mode, 1);
10844 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10845 pointing where to return struct values. */
10846 if (call_used_regs[3] && ! fixed_regs[3])
10847 scratch2 = gen_rtx_REG (Pmode, 3);
10850 this_value = plus_constant (Pmode, this_rtx, delta);
10851 if (vcall_offset
10852 && (simple_add || scratch0 != scratch1)
10853 && strict_memory_address_p (ptr_mode, this_value))
10855 emit_load_ptr (scratch0, this_value);
10856 did_load = 1;
10859 if (!delta)
10860 ; /* Do nothing. */
10861 else if (simple_add)
10862 emit_move_insn (this_rtx, this_value);
10863 else
10865 emit_move_insn (scratch1, GEN_INT (delta));
10866 emit_insn (gen_add2_insn (this_rtx, scratch1));
10869 if (vcall_offset)
10871 rtx offset_addr;
10873 if (!did_load)
10874 emit_load_ptr (scratch0, this_rtx);
10876 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
10877 if (strict_memory_address_p (ptr_mode, offset_addr))
10878 ; /* Do nothing. */
10879 else if (scratch0 != scratch1)
10881 /* scratch0 != scratch1, and we have indexed loads. Get better
10882 schedule by loading the offset into r1 and using an indexed
10883 load - then the load of r1 can issue before the load from
10884 (this_rtx + delta) finishes. */
10885 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10886 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10888 else if (CONST_OK_FOR_ADD (vcall_offset))
10890 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10891 offset_addr = scratch0;
10893 else if (scratch0 != scratch1)
10895 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10896 emit_insn (gen_add2_insn (scratch0, scratch1));
10897 offset_addr = scratch0;
10899 else
10900 gcc_unreachable (); /* FIXME */
10901 emit_load_ptr (scratch0, offset_addr);
10903 if (Pmode != ptr_mode)
10904 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10905 emit_insn (gen_add2_insn (this_rtx, scratch0));
10908 /* Generate a tail call to the target function. */
10909 if (! TREE_USED (function))
10911 assemble_external (function);
10912 TREE_USED (function) = 1;
10914 funexp = XEXP (DECL_RTL (function), 0);
10915 /* If the function is overridden, so is the thunk, hence we don't
10916 need GOT addressing even if this is a public symbol. */
10917 #if 0
10918 if (TARGET_SH1 && ! flag_weak)
10919 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10920 else
10921 #endif
10922 if (TARGET_SH2 && flag_pic)
10924 if (TARGET_FDPIC)
10926 sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
10927 XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
10929 else
10931 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10932 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10935 else
10937 emit_move_insn (scratch2, funexp);
10938 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10939 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10941 sibcall = emit_call_insn (sibcall);
10942 SIBLING_CALL_P (sibcall) = 1;
10943 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10944 emit_barrier ();
10946 /* Run just enough of rest_of_compilation to do scheduling and get
10947 the insns emitted. Note that use_thunk calls
10948 assemble_start_function and assemble_end_function. */
10950 insns = get_insns ();
10952 if (optimize > 0)
10954 if (! cfun->cfg)
10955 init_flow (cfun);
10956 split_all_insns_noflow ();
10959 sh_reorg ();
10960 shorten_branches (insns);
10961 final_start_function (insns, file, 1);
10962 final (insns, file, 1);
10963 final_end_function ();
10965 reload_completed = 0;
10966 epilogue_completed = 0;
10969 /* Return an RTX pair for the address and call site label of a function
10970 NAME of kind KIND, placing the result in TARGET if not NULL. For
10971 SFUNC_STATIC, if FDPIC, the LAB member of result will be set to
10972 (const_int 0) if jsr should be used, or a label_ref if bsrf should
10973 be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the
10974 address of the function itself, not a function descriptor, so they
10975 can only be used with functions not using the FDPIC register that
10976 are known to be called directory without a PLT entry. */
10978 function_symbol_result
10979 function_symbol (rtx target, const char *name, sh_function_kind kind)
10981 /* If this is not an ordinary function, the name usually comes from a
10982 string literal or an sprintf buffer. Make sure we use the same
10983 string consistently, so that cse will be able to unify address loads. */
10984 if (kind != FUNCTION_ORDINARY)
10985 name = IDENTIFIER_POINTER (get_identifier (name));
10986 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
10987 rtx lab = const0_rtx;
10988 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10989 if (flag_pic)
10990 switch (kind)
10992 case FUNCTION_ORDINARY:
10993 break;
10994 case SFUNC_GOT:
10996 rtx reg = target ? target : gen_reg_rtx (Pmode);
10998 emit_insn (gen_symGOT2reg (reg, sym));
10999 sym = reg;
11000 break;
11002 case SFUNC_STATIC:
11004 rtx reg = target ? target : gen_reg_rtx (Pmode);
11006 if (TARGET_FDPIC)
11008 /* We use PC-relative calls, since GOTOFF can only refer
11009 to writable data. This works along with sh_sfunc_call. */
11010 lab = PATTERN (gen_call_site ());
11011 emit_insn (gen_sym_label2reg (reg, sym, lab));
11013 else
11015 /* ??? To allow cse to work, we use GOTOFF relocations.
11016 we could add combiner patterns to transform this into
11017 straight pc-relative calls with sym2PIC / bsrf when
11018 label load and function call are still 1:1 and in the
11019 same basic block during combine. */
11020 emit_insn (gen_symGOTOFF2reg (reg, sym));
11023 sym = reg;
11024 break;
11027 if (target && sym != target)
11029 emit_move_insn (target, sym);
11030 return function_symbol_result (target, lab);
11032 return function_symbol_result (sym, lab);
11035 /* Find the number of the first general purpose register in S that
11036 is not set. */
11037 static int
11038 scavenge_reg (HARD_REG_SET *s)
11040 for (int r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11041 if (TEST_HARD_REG_BIT (*s, r))
11042 return r;
11043 return -1;
11047 sh_get_pr_initial_val (void)
11049 /* If we haven't finished rtl generation, there might be a nonlocal label
11050 that we haven't seen yet.
11051 ??? get_hard_reg_initial_val fails if it is called after register
11052 allocation has started, unless it has been called before for the
11053 same register. And even then, we end in trouble if we didn't use
11054 the register in the same basic block before. So call
11055 get_hard_reg_initial_val now and wrap it in an unspec if we might
11056 need to replace it. */
11057 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11058 combine can put the pseudo returned by get_hard_reg_initial_val into
11059 instructions that need a general purpose registers, which will fail to
11060 be recognized when the pseudo becomes allocated to PR. */
11061 rtx val = get_hard_reg_initial_val (Pmode, PR_REG);
11062 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11065 bool
11066 sh_expand_t_scc (rtx operands[])
11068 enum rtx_code code = GET_CODE (operands[1]);
11069 rtx target = operands[0];
11070 rtx op0 = operands[2];
11071 rtx op1 = operands[3];
11072 rtx result = target;
11074 if (!REG_P (op0) || REGNO (op0) != T_REG
11075 || !CONST_INT_P (op1))
11076 return false;
11077 if (!REG_P (result))
11078 result = gen_reg_rtx (SImode);
11079 HOST_WIDE_INT val = INTVAL (op1);
11080 if ((code == EQ && val == 1) || (code == NE && val == 0))
11081 emit_insn (gen_movt (result, get_t_reg_rtx ()));
11082 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11083 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
11084 else if (code == EQ || code == NE)
11085 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11086 else
11087 return false;
11088 if (result != target)
11089 emit_move_insn (target, result);
11090 return true;
11093 /* INSN is an sfunc; return the rtx that describes the address used. */
11094 static rtx
11095 extract_sfunc_addr (rtx insn)
11097 rtx pattern = PATTERN (insn);
11098 const int len = XVECLEN (pattern, 0);
11099 for (int i = 0; i < len; i++)
11101 rtx part = XVECEXP (pattern, 0, i);
11102 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11103 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11104 return XEXP (part, 0);
11106 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11107 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11110 /* Verify that the register in use_sfunc_addr still agrees with the address
11111 used in the sfunc. This prevents fill_slots_from_thread from changing
11112 use_sfunc_addr.
11113 INSN is the use_sfunc_addr instruction, and REG is the register it
11114 guards. */
11115 bool
11116 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
11118 /* Search for the sfunc. It should really come right after INSN. */
11119 while ((insn = NEXT_INSN (insn)))
11121 if (LABEL_P (insn) || JUMP_P (insn))
11122 break;
11123 if (! INSN_P (insn))
11124 continue;
11126 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
11127 insn = seq->insn (0);
11128 if (GET_CODE (PATTERN (insn)) != PARALLEL
11129 || get_attr_type (insn) != TYPE_SFUNC)
11130 continue;
11131 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11133 gcc_unreachable ();
11136 /* This function returns a constant rtx that represents 2**15 / pi in
11137 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
11138 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
11139 static GTY(()) rtx sh_fsca_sf2int_rtx;
11142 sh_fsca_sf2int (void)
11144 if (! sh_fsca_sf2int_rtx)
11146 REAL_VALUE_TYPE rv;
11148 real_from_string (&rv, "10430.378350470453");
11149 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11152 return sh_fsca_sf2int_rtx;
11155 /* This function returns a constant rtx that represents pi / 2**15 in
11156 SFmode. It's used to scale SFmode angles, in radians, to a
11157 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
11158 maps to 0x10000. */
11159 static GTY(()) rtx sh_fsca_int2sf_rtx;
11162 sh_fsca_int2sf (void)
11164 if (! sh_fsca_int2sf_rtx)
11166 REAL_VALUE_TYPE rv;
11168 real_from_string (&rv, "9.587379924285257e-5");
11169 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11172 return sh_fsca_int2sf_rtx;
11175 /* Initialize the CUMULATIVE_ARGS structure. */
11176 void
11177 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11178 tree fntype,
11179 rtx libname ATTRIBUTE_UNUSED,
11180 tree fndecl,
11181 signed int n_named_args,
11182 machine_mode mode)
11184 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11185 pcum->free_single_fp_reg = 0;
11186 pcum->outgoing = n_named_args != -1;
11188 /* FIXME: Should we check TARGET_HITACHI here ??? */
11189 pcum->renesas_abi = sh_attr_renesas_p (fntype);
11191 if (fntype)
11193 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11194 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11195 pcum->prototype_p = prototype_p (fntype);
11196 pcum->arg_count [(int) SH_ARG_INT] = false;
11198 else
11200 pcum->arg_count [(int) SH_ARG_INT] = 0;
11201 pcum->prototype_p = false;
11202 if (mode != VOIDmode)
11204 /* If the default ABI is the Renesas ABI then all library
11205 calls must assume that the library will be using the
11206 Renesas ABI. So if the function would return its result
11207 in memory then we must force the address of this memory
11208 block onto the stack. Ideally we would like to call
11209 targetm.calls.return_in_memory() here but we do not have
11210 the TYPE or the FNDECL available so we synthesize the
11211 contents of that function as best we can. */
11212 pcum->force_mem =
11213 (TARGET_DEFAULT & MASK_HITACHI)
11214 && (mode == BLKmode
11215 || (GET_MODE_SIZE (mode) > 4
11216 && !(mode == DFmode
11217 && TARGET_FPU_DOUBLE)));
11219 else
11220 pcum->force_mem = false;
11225 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
11227 enum rtx_code code = TRUNCATE;
11229 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11231 rtx inner = XEXP (x, 0);
11232 machine_mode inner_mode = GET_MODE (inner);
11234 if (inner_mode == mode)
11235 return inner;
11236 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11237 x = inner;
11238 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11239 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11241 code = GET_CODE (x);
11242 x = inner;
11245 return gen_rtx_fmt_e (code, mode, x);
11248 /* Load and store depend on the highpart of the address. However,
11249 set_attr_alternative does not give well-defined results before reload,
11250 so we must look at the rtl ourselves to see if any of the feeding
11251 registers is used in a memref.
11253 Return true iff INSN contains a MEM. */
11254 bool
11255 sh_contains_memref_p (rtx insn)
11257 subrtx_iterator::array_type array;
11258 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
11259 if (MEM_P (*iter))
11260 return true;
11261 return false;
11264 /* Return true iff INSN loads a banked register. */
11265 bool
11266 sh_loads_bankedreg_p (rtx insn)
11268 if (GET_CODE (PATTERN (insn)) == SET)
11270 rtx op = SET_DEST (PATTERN(insn));
11271 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11272 return true;
11275 return false;
11278 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
11279 static reg_class_t
11280 sh_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
11282 return rclass;
11285 /* Implement TARGET_SECONDARY_RELOAD. */
11286 static reg_class_t
11287 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11288 machine_mode mode, secondary_reload_info *sri)
11290 enum reg_class rclass = (enum reg_class) rclass_i;
11292 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
11293 && REG_P (XEXP (XEXP (x, 0), 0))
11294 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
11295 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11297 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
11298 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11300 if (REG_P (x) && REGNO (x) == GBR_REG)
11301 return NO_REGS;
11303 if (in_p)
11305 if (REGCLASS_HAS_FP_REG (rclass)
11306 && immediate_operand ((x), mode)
11307 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
11308 switch (mode)
11310 case E_SFmode:
11311 sri->icode = CODE_FOR_reload_insf__frn;
11312 return NO_REGS;
11313 case E_DFmode:
11314 sri->icode = CODE_FOR_reload_indf__frn;
11315 return NO_REGS;
11316 case E_SImode:
11317 /* ??? If we knew that we are in the appropriate mode -
11318 single precision - we could use a reload pattern directly. */
11319 return FPUL_REGS;
11320 default:
11321 abort ();
11323 if (rclass == FPUL_REGS
11324 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11325 || REGNO (x) == T_REG))
11326 || GET_CODE (x) == PLUS))
11327 return GENERAL_REGS;
11328 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11330 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11331 return GENERAL_REGS;
11332 else if (mode == SFmode)
11333 return FP_REGS;
11334 sri->icode = CODE_FOR_reload_insi__i_fpul;
11335 return NO_REGS;
11337 if (rclass == FPSCR_REGS
11338 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11339 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11340 return GENERAL_REGS;
11341 } /* end of input-only processing. */
11343 if (((REGCLASS_HAS_FP_REG (rclass)
11344 && (REG_P (x)
11345 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11346 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11347 && TARGET_FMOVD))))
11348 || (REGCLASS_HAS_GENERAL_REG (rclass)
11349 && REG_P (x)
11350 && FP_REGISTER_P (REGNO (x))))
11351 && (mode == SFmode || mode == SImode))
11352 return FPUL_REGS;
11353 if ((rclass == FPUL_REGS
11354 || (REGCLASS_HAS_FP_REG (rclass) && mode == SImode))
11355 && (MEM_P (x)
11356 || (REG_P (x)
11357 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11358 || REGNO (x) == T_REG
11359 || system_reg_operand (x, VOIDmode)))))
11361 if (rclass == FPUL_REGS)
11362 return GENERAL_REGS;
11363 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
11366 if ((rclass == MAC_REGS || rclass == PR_REGS)
11367 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11368 && rclass != REGNO_REG_CLASS (REGNO (x)))
11369 return GENERAL_REGS;
11371 /* If here fall back to loading FPUL register through general registers.
11372 This case can happen when movsi_ie insn is picked initially to
11373 load/store the FPUL register from/to another register, and then the
11374 other register is allocated on the stack. */
11375 if (rclass == FPUL_REGS && true_regnum (x) == -1)
11376 return GENERAL_REGS;
11378 /* Force mov.b / mov.w displacement addressing insn to use R0 as
11379 the other operand.
11380 On SH2A could also just leave it alone here, which would result in a
11381 4 byte move insn being generated instead. However, for this to work
11382 the insns must have the appropriate alternatives. */
11383 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11384 && satisfies_constraint_Sdd (x)
11385 && sh_disp_addr_displacement (x)
11386 <= sh_max_mov_insn_displacement (mode, false))
11387 return R0_REGS;
11389 /* When reload is trying to address a QImode or HImode subreg on the stack,
11390 force any subreg byte into R0_REGS, as this is going to become a
11391 displacement address.
11392 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
11393 is on the stack, the memref to it might already require a displacement
11394 and that has to be added to the final address. At this point we don't
11395 know the cumulative displacement so we assume the worst case. */
11396 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11397 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
11398 return R0_REGS;
11400 return NO_REGS;
11403 /* Return true if SUBST can't safely replace its equivalent during RA. */
11404 static bool
11405 sh_cannot_substitute_mem_equiv_p (rtx)
11407 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
11408 uses R0 and may cause spill failure when R0 is already used.
11409 We have to return true for that case at least.
11410 Moreover SH has strong R0 parity and also have not enough numbers of
11411 the hard registers to make the equiv substitution win in the size
11412 and the speed on average working sets. The pseudos produced to
11413 hold the equiv values can't get good hard registers for bad cases
11414 and end up memory save/restore insns which make the code worse. */
11415 return true;
11418 /* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
11419 static bool
11420 sh_legitimize_address_displacement (rtx *offset1, rtx *offset2,
11421 poly_int64 orig_offset,
11422 machine_mode mode)
11424 if ((TARGET_FPU_DOUBLE && mode == DFmode)
11425 || (TARGET_SH2E && mode == SFmode))
11426 return false;
11428 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, orig_offset);
11429 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
11431 *offset1 = adj.offset_adjust;
11432 *offset2 = adj.mov_disp;
11433 return true;
11436 return false;
11439 /* Return true if movsf insn should be splited with an additional
11440 register. */
11441 bool
11442 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
11444 /* op0 == op1 */
11445 if (rtx_equal_p (op0, op1))
11446 return true;
11447 /* fy, FQ, reg */
11448 if (GET_CODE (op1) == CONST_DOUBLE
11449 && ! satisfies_constraint_G (op1)
11450 && ! satisfies_constraint_H (op1)
11451 && REG_P (op0)
11452 && REG_P (op2))
11453 return true;
11454 /* f, r, y */
11455 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
11456 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
11457 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11458 return true;
11459 /* r, f, y */
11460 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
11461 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
11462 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11463 return true;
11465 return false;
11468 static void
11469 sh_conditional_register_usage (void)
11471 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
11472 if (! VALID_REGISTER_P (regno))
11473 fixed_regs[regno] = call_used_regs[regno] = 1;
11474 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
11475 if (flag_pic)
11477 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11478 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11480 if (TARGET_FDPIC)
11482 fixed_regs[PIC_REG] = 1;
11483 call_used_regs[PIC_REG] = 1;
11484 call_really_used_regs[PIC_REG] = 1;
11486 /* Renesas saves and restores mac registers on call. */
11487 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
11489 call_really_used_regs[MACH_REG] = 0;
11490 call_really_used_regs[MACL_REG] = 0;
11493 for (int regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
11494 if (! fixed_regs[regno] && call_really_used_regs[regno])
11495 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
11497 call_really_used_regs[FPSCR_MODES_REG] = 0;
11498 call_really_used_regs[FPSCR_STAT_REG] = 0;
11501 /* Implement TARGET_LEGITIMATE_CONSTANT_P
11503 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
11504 static bool
11505 sh_legitimate_constant_p (machine_mode mode, rtx x)
11507 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
11509 rtx base, offset;
11510 split_const (x, &base, &offset);
11512 if (GET_CODE (base) == SYMBOL_REF
11513 && !offset_within_block_p (base, INTVAL (offset)))
11514 return false;
11517 if (TARGET_FDPIC
11518 && (SYMBOLIC_CONST_P (x)
11519 || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
11520 && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
11521 return false;
11523 return GET_CODE (x) != CONST_DOUBLE
11524 || mode == DFmode || mode == SFmode
11525 || mode == DImode || GET_MODE (x) == VOIDmode;
11528 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11530 static void
11531 sh_init_sync_libfuncs (void)
11533 init_sync_libfuncs (UNITS_PER_WORD);
11536 /* Return true if it is appropriate to emit `ret' instructions in the
11537 body of a function. */
11538 bool
11539 sh_can_use_simple_return_p (void)
11541 if (! reload_completed || frame_pointer_needed)
11542 return false;
11544 /* Moving prologue around does't reduce the size. */
11545 if (optimize_function_for_size_p (cfun))
11546 return false;
11548 /* Finally, allow for pr save. */
11549 HARD_REG_SET live_regs_mask;
11550 int d = calc_live_regs (&live_regs_mask);
11552 if (rounded_frame_size (d) > 4)
11553 return false;
11555 return true;
11558 /*------------------------------------------------------------------------------
11559 Address mode optimization support code
11562 typedef HOST_WIDE_INT disp_t;
11563 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
11564 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
11565 static const disp_t INVALID_DISP = MAX_DISP;
11567 /* A memory reference which is described by a base register and a
11568 displacement. */
11569 class base_reg_disp
11571 public:
11572 base_reg_disp (rtx br, disp_t d);
11574 bool is_reg (void) const;
11575 bool is_disp (void) const;
11576 rtx reg (void) const;
11577 disp_t disp (void) const;
11579 private:
11580 rtx reg_;
11581 disp_t disp_;
11584 inline
11585 base_reg_disp::base_reg_disp (rtx br, disp_t d)
11586 : reg_ (br), disp_ (d)
11590 inline bool
11591 base_reg_disp::is_reg (void) const
11593 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
11596 inline bool
11597 base_reg_disp::is_disp (void) const
11599 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
11602 inline rtx
11603 base_reg_disp::reg (void) const
11605 return reg_;
11608 inline disp_t
11609 base_reg_disp::disp (void) const
11611 return disp_;
11614 /* Find the base register and calculate the displacement for a given
11615 address rtx 'x'. */
11616 static base_reg_disp
11617 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
11618 rtx base_reg = NULL)
11620 if (REG_P (x))
11622 if (REGNO (x) == GBR_REG)
11623 return base_reg_disp (x, disp);
11625 /* We've reached a hard-reg. This is probably the point where
11626 function args are copied to pseudos. Do not go any further and
11627 stick to the pseudo. If the original mem addr was in a hard reg
11628 from the beginning, it will become the base reg. */
11629 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
11630 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
11632 /* Find the def of the reg and trace it. If there are more than one
11633 defs and they are not the same, assume it's not safe to proceed. */
11634 rtx_insn* last_i = NULL;
11635 rtx last_set = NULL;
11636 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
11637 d = DF_REF_NEXT_REG (d))
11639 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
11641 /* Accept multiple defs, as long as they are equal. */
11642 if (last_set == NULL || rtx_equal_p (last_set, set))
11644 last_i = DF_REF_INSN (d);
11645 last_set = set;
11647 else
11649 last_i = NULL;
11650 last_set = NULL;
11651 break;
11655 if (last_set != NULL && last_i != NULL)
11656 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
11657 XEXP (last_set, 0));
11659 /* When here, no previous insn was found that sets the reg.
11660 The input reg is already the base reg. */
11661 return base_reg_disp (x, disp);
11664 else if (GET_CODE (x) == PLUS)
11666 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
11667 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
11669 /* Either left or right val must be a reg.
11670 We don't handle the case of 'reg + reg' here. */
11671 if (left_val.is_reg () && right_val.is_disp ())
11672 return base_reg_disp (left_val.reg (), left_val.disp ()
11673 + right_val.disp () + disp);
11674 else if (right_val.is_reg () && left_val.is_disp ())
11675 return base_reg_disp (right_val.reg (), right_val.disp ()
11676 + left_val.disp () + disp);
11677 else
11678 return base_reg_disp (base_reg, disp);
11681 else if (CONST_INT_P (x))
11682 return base_reg_disp (NULL, disp + INTVAL (x));
11684 /* Didn't find anything useful. */
11685 return base_reg_disp (base_reg, disp);
11688 /* Given an insn and a memory operand, try to find an equivalent GBR
11689 based memory address and return the corresponding new memory address.
11690 Return NULL_RTX if not found. */
11692 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
11694 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
11695 return NULL_RTX;
11697 /* Leave post/pre inc/dec or any other side effect addresses alone. */
11698 if (side_effects_p (XEXP (mem, 0)))
11699 return NULL_RTX;
11701 /* When not optimizing there might be no dataflow available. */
11702 if (df == NULL)
11703 return NULL_RTX;
11705 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
11707 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
11709 /* If GBR is marked as call clobbered we bail out if we see a call.
11710 FIXME: Actually should check if this mem refers to the gbr value
11711 before or after the call. If there is a store_gbr preceeding this
11712 mem, it's safe to use GBR for this mem.
11714 If GBR is not marked as call clobbered, but there is some other
11715 def than a call, it's probably a load_gbr upon which we also
11716 bail out to be on the safe side.
11717 FIXME: Should check if we have a use-after-def case, such as
11718 the call case above. */
11719 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
11720 d = DF_REF_NEXT_REG (d))
11722 if (CALL_P (DF_REF_INSN (d)))
11724 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
11725 return NULL_RTX;
11726 else
11727 continue;
11729 else
11730 return NULL_RTX;
11733 rtx disp = GEN_INT (gbr_disp.disp ());
11734 if (gbr_displacement (disp, GET_MODE (mem)))
11735 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
11738 return NULL_RTX;
11741 /*------------------------------------------------------------------------------
11742 Manual insn combine support code.
11745 /* Return true if the specified insn contains any UNSPECs or
11746 UNSPEC_VOLATILEs. */
11747 static bool
11748 sh_unspec_insn_p (rtx x)
11750 subrtx_iterator::array_type array;
11751 FOR_EACH_SUBRTX (i, array, x, ALL)
11752 if (*i != NULL
11753 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
11754 return true;
11756 return false;
11759 /* Return true if the register operands of the specified insn are modified
11760 between the specified from and to insns (exclusive of those two). */
11761 bool
11762 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
11763 const rtx_insn* from,
11764 const rtx_insn* to)
11766 /* FIXME: Return true for multiple sets for now. */
11767 rtx s = single_set (operands_insn);
11768 if (s == NULL_RTX)
11769 return true;
11771 subrtx_iterator::array_type array;
11772 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
11773 if (*i != NULL &&
11774 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
11775 return true;
11777 return false;
11780 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
11781 negates the T bit and stores the result in the T bit. */
11782 bool
11783 sh_is_nott_insn (const rtx_insn* i)
11785 return i != NULL && GET_CODE (PATTERN (i)) == SET
11786 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
11787 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
11791 sh_movt_set_dest (const rtx_insn* i)
11793 return i == NULL ? NULL : sh_movt_set_dest (PATTERN (i));
11797 sh_movt_set_dest (const_rtx pat)
11799 return GET_CODE (pat) == SET
11800 && arith_reg_dest (XEXP (pat, 0), SImode)
11801 && t_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11804 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
11805 that stores the negated T bit in a register, and return the destination
11806 register rtx, or null. */
11808 sh_movrt_set_dest (const rtx_insn* i)
11810 return i == NULL ? NULL : sh_movrt_set_dest (PATTERN (i));
11814 sh_movrt_set_dest (const_rtx pat)
11816 /* The negc movrt replacement is inside a parallel. */
11817 if (GET_CODE (pat) == PARALLEL)
11818 pat = XVECEXP (pat, 0, 0);
11820 return GET_CODE (pat) == SET
11821 && arith_reg_dest (XEXP (pat, 0), SImode)
11822 && negt_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11826 /* Given an insn and a reg number, tell whether the reg dies or is unused
11827 after the insn. */
11828 bool
11829 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
11831 return find_regno_note (i, REG_DEAD, regno) != NULL
11832 || find_regno_note (i, REG_UNUSED, regno) != NULL;
11835 /* Given an insn and a reg number, remove reg dead or reg unused notes to
11836 mark it as being used after the insn. */
11837 void
11838 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
11840 if (rtx n = find_regno_note (i, REG_DEAD, regno))
11841 remove_note (i, n);
11842 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
11843 remove_note (i, n);
11846 /* Given an insn check if it contains any post/pre inc/dec mem operands and
11847 add the REG_INC notes accordingly.
11848 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
11849 FIXME: This function is currently used by peephole2 patterns because
11850 the peephole2 pass does not preserve REG_INC notes. If the notes
11851 are dropped the following passes will do wrong things. */
11852 rtx_insn*
11853 sh_check_add_incdec_notes (rtx_insn* i)
11855 struct for_each_inc_dec_clb
11857 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
11858 rtx dest, rtx src ATTRIBUTE_UNUSED,
11859 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
11861 gcc_assert (REG_P (dest));
11863 rtx_insn* i = (rtx_insn*)arg;
11864 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
11865 add_reg_note (i, REG_INC, dest);
11867 return 0;
11871 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
11872 return i;
11875 /* Given a move insn destiation and a source, make sure that the move source
11876 operand is not a post-inc mem load with the same address reg as the
11877 destination. Returns the modified source operand with the post-inc removed
11878 if necessary. */
11880 sh_remove_overlapping_post_inc (rtx dst, rtx src)
11882 if (!MEM_P (src))
11883 return src;
11885 rtx addr = XEXP (src, 0);
11887 if (GET_CODE (addr) == POST_INC
11888 && reg_overlap_mentioned_p (XEXP (addr, 0), dst))
11889 return replace_equiv_address (src, XEXP (addr, 0));
11891 gcc_assert (GET_CODE (addr) != POST_MODIFY);
11892 return src;
11895 /* Emit a move insn that is safe to be used in peephole patterns. */
11896 rtx_insn*
11897 sh_peephole_emit_move_insn (rtx dst, rtx src)
11899 return sh_check_add_incdec_notes (
11900 emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src)));
11903 /* Given an op rtx and an insn, try to find out whether the result of the
11904 specified op consists only of logical operations on T bit stores. */
11905 bool
11906 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
11908 if (!logical_operator (op, SImode))
11909 return false;
11911 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
11912 int op_is_t_count = 0;
11914 for (int i = 0; i < 2; ++i)
11916 if (t_reg_operand (ops[i], VOIDmode)
11917 || negt_reg_operand (ops[i], VOIDmode))
11918 op_is_t_count++;
11920 else
11922 set_of_reg op_set = sh_find_set_of_reg
11923 (ops[i], insn, prev_nonnote_nondebug_insn_bb);
11924 if (op_set.set_src == NULL_RTX)
11925 continue;
11927 if (t_reg_operand (op_set.set_src, VOIDmode)
11928 || negt_reg_operand (op_set.set_src, VOIDmode)
11929 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
11930 op_is_t_count++;
11934 return op_is_t_count == 2;
11937 /* Given the operand that is extended in a sign/zero extend insn, and the
11938 insn, try to figure out whether the sign/zero extension can be replaced
11939 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
11940 NULL_RTX otherwise. */
11942 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
11944 if (REG_P (extended_op))
11945 extended_op = extended_op;
11946 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
11947 extended_op = SUBREG_REG (extended_op);
11948 else
11949 return NULL_RTX;
11951 /* Reg moves must be of the same mode. */
11952 if (GET_MODE (extended_op) != SImode)
11953 return NULL_RTX;
11955 set_of_reg s = sh_find_set_of_reg (extended_op, insn,
11956 prev_nonnote_nondebug_insn_bb);
11957 if (s.set_src == NULL_RTX)
11958 return NULL_RTX;
11960 if (t_reg_operand (s.set_src, VOIDmode)
11961 || negt_reg_operand (s.set_src, VOIDmode))
11962 return extended_op;
11964 /* If the zero extended reg was formed by a logical operation, check the
11965 operands of the logical operation. If both originated from T bit
11966 stores the zero extension can be eliminated. */
11967 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
11968 return extended_op;
11970 return NULL_RTX;
11973 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
11974 figure out whether it should be converted into a movt-xor sequence in
11975 the movrt_negc splitter.
11976 Returns true if insns have been modified and the splitter has succeeded. */
11977 bool
11978 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
11980 /* In cases such as
11981 tst r4,r4
11982 mov #-1,r1
11983 negc r1,r1
11984 tst r4,r4
11985 we can replace the T bit clobbering negc with a movt-xor sequence and
11986 eliminate the redundant comparison.
11987 Because the xor insn depends on register allocation results, allow this
11988 only before reload. */
11989 if (!can_create_pseudo_p ())
11990 return false;
11992 set_of_reg t_before_negc = sh_find_set_of_reg
11993 (get_t_reg_rtx (), curr_insn, prev_nonnote_nondebug_insn_bb);
11994 set_of_reg t_after_negc = sh_find_set_of_reg
11995 (get_t_reg_rtx (), curr_insn, next_nonnote_nondebug_insn_bb);
11997 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
11998 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
11999 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
12000 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
12001 t_before_negc.insn,
12002 t_after_negc.insn)
12003 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
12004 && !sh_unspec_insn_p (t_after_negc.insn)
12005 && !volatile_insn_p (PATTERN (t_after_negc.insn))
12006 && !side_effects_p (PATTERN (t_after_negc.insn))
12007 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
12009 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
12010 set_insn_deleted (t_after_negc.insn);
12011 return true;
12013 else
12014 return false;
12017 /* Given a reg and the current insn, see if the value of the reg originated
12018 from a sign or zero extension and return the discovered information. */
12019 sh_extending_set_of_reg
12020 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
12022 if (reg == NULL)
12023 return sh_extending_set_of_reg (curr_insn);
12025 if (SUBREG_P (reg))
12026 reg = SUBREG_REG (reg);
12028 if (!REG_P (reg))
12029 return sh_extending_set_of_reg (curr_insn);
12031 /* FIXME: Also search the predecessor basic blocks. It seems that checking
12032 only the adjacent predecessor blocks would cover most of the cases.
12033 Also try to look through the first extension that we hit. There are some
12034 cases, where a zero_extend is followed an (implicit) sign_extend, and it
12035 fails to see the sign_extend. */
12036 sh_extending_set_of_reg result = sh_find_set_of_reg
12037 (reg, curr_insn, prev_nonnote_nondebug_insn_bb, true);
12039 if (result.set_src != NULL)
12041 if (GET_CODE (result.set_src) == SIGN_EXTEND
12042 || GET_CODE (result.set_src) == ZERO_EXTEND)
12044 if (dump_file)
12045 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12046 "explicitly sign/zero extended in insn %d\n",
12047 REGNO (reg), INSN_UID (result.insn));
12048 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
12049 result.ext_code = GET_CODE (result.set_src);
12051 else if (MEM_P (result.set_src)
12052 && (GET_MODE (result.set_src) == QImode
12053 || GET_MODE (result.set_src) == HImode)
12054 && !sh_unspec_insn_p (result.insn))
12056 /* On SH QIHImode memory loads always sign extend. However, in
12057 some cases where it seems that the higher bits are not
12058 interesting, the loads will not be expanded as sign extending
12059 insns, but as QIHImode loads into QIHImode regs. We report that
12060 the reg has been sign extended by the mem load. When it is used
12061 as such, we must convert the mem load into a sign extending insn,
12062 see also sh_extending_set_of_reg::use_as_extended_reg. */
12063 if (dump_file)
12064 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12065 "implicitly sign extended in insn %d\n",
12066 REGNO (reg), INSN_UID (result.insn));
12067 result.from_mode = GET_MODE (result.set_src);
12068 result.ext_code = SIGN_EXTEND;
12072 return result;
12075 /* Given a reg that is known to be sign or zero extended at some insn,
12076 take the appropriate measures so that the extended value can be used as
12077 a reg at the specified insn and return the resulting reg rtx. */
12079 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
12081 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
12082 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
12083 gcc_assert (from_mode == QImode || from_mode == HImode);
12085 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
12087 if (dump_file)
12088 fprintf (dump_file,
12089 "use_as_extended_reg: converting non-extending mem load in "
12090 "insn %d into sign-extending load\n", INSN_UID (insn));
12092 rtx r = gen_reg_rtx (SImode);
12093 rtx_insn* i0;
12094 if (from_mode == QImode)
12095 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
12096 else if (from_mode == HImode)
12097 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
12098 else
12099 gcc_unreachable ();
12101 emit_insn_after (
12102 gen_move_insn (XEXP (set_rtx, 0),
12103 gen_lowpart (GET_MODE (set_src), r)), i0);
12104 set_insn_deleted (insn);
12105 return r;
12107 else
12109 rtx extension_dst = XEXP (set_rtx, 0);
12110 if (GET_MODE (extension_dst) != SImode)
12111 extension_dst = simplify_gen_subreg (SImode, extension_dst,
12112 GET_MODE (extension_dst), 0);
12113 if (modified_between_p (extension_dst, insn, use_at_insn))
12115 if (dump_file)
12116 fprintf (dump_file,
12117 "use_as_extended_reg: dest reg %d of extending insn %d is "
12118 "modified, inserting a reg-reg copy\n",
12119 REGNO (extension_dst), INSN_UID (insn));
12121 rtx r = gen_reg_rtx (SImode);
12122 emit_insn_after (gen_move_insn (r, extension_dst), insn);
12123 return r;
12125 else
12127 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
12128 return extension_dst;
12133 bool
12134 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
12136 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
12137 && (from_mode == QImode || from_mode == HImode)
12138 && set_src != NULL)
12139 return arith_reg_operand (XEXP (set_src, 0), from_mode);
12140 else
12141 return false;
12145 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
12147 gcc_assert (can_use_as_unextended_reg ());
12149 rtx r = XEXP (set_src, 0);
12150 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
12152 if (modified_between_p (r, insn, use_at_insn))
12154 rtx r1 = gen_reg_rtx (SImode);
12155 emit_insn_after (gen_move_insn (r1, r0), insn);
12156 return r1;
12158 else
12160 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
12161 ? REGNO (SUBREG_REG (r))
12162 : REGNO (r));
12163 return r0;
12167 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
12168 perform the necessary checks on the operands and split it accordingly. */
12169 void
12170 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
12171 int subreg_offset, rtx operands[])
12173 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
12175 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
12176 curr_insn);
12177 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
12178 curr_insn);
12180 /* If one of the operands is known to be zero extended, that's already
12181 sufficient to mask out the unwanted high bits. */
12182 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
12184 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12185 operands[1]));
12186 return;
12188 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
12190 emit_insn (gen_tstsi_t (operands[0],
12191 eop1.use_as_extended_reg (curr_insn)));
12192 return;
12195 /* None of the operands seem to be zero extended.
12196 If both are sign extended it's OK, too. */
12197 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
12198 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
12200 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12201 eop1.use_as_extended_reg (curr_insn)));
12202 return;
12205 /* Otherwise we have to insert a zero extension on one of the operands to
12206 mask out the unwanted high bits.
12207 Prefer the operand that has no known extension. */
12208 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
12209 std::swap (operands[0], operands[1]);
12211 rtx tmp0 = gen_reg_rtx (SImode);
12212 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
12213 GET_MODE (operands[0]), subreg_offset);
12214 emit_insn (subreg_mode == QImode
12215 ? gen_zero_extendqisi2 (tmp0, tmp1)
12216 : gen_zero_extendhisi2 (tmp0, tmp1));
12217 emit_insn (gen_tstsi_t (tmp0, operands[1]));
12220 /* A helper class to increment/decrement a counter variable each time a
12221 function is entered/left. */
12222 class scope_counter
12224 public:
12225 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
12227 ~scope_counter (void)
12229 --m_counter;
12230 gcc_assert (m_counter >= 0);
12233 int count (void) const { return m_counter; }
12235 private:
12236 int& m_counter;
12239 /* Given an rtx x, determine whether the expression can be used to create
12240 an insn that calulates x and stores the result in the T bit.
12241 This is used by the 'treg_set_expr' predicate to construct insns sequences
12242 where T bit results are fed into other insns, such as addc, subc, negc
12243 insns.
12245 FIXME: The patterns that expand 'treg_set_expr' operands tend to
12246 distinguish between 'positive' and 'negative' forms. For now this has to
12247 be done in the preparation code. We could also introduce
12248 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
12249 two different patterns for the 'postive' and 'negative' forms. However,
12250 the total amount of lines of code seems to be about the same and the
12251 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
12252 recog function would need to look inside the expression by temporarily
12253 splitting it. */
12254 static int sh_recog_treg_set_expr_reent_count = 0;
12256 bool
12257 sh_recog_treg_set_expr (rtx op, machine_mode mode)
12259 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
12261 /* Limit the recursion count to avoid nested expressions which we can't
12262 resolve to a single treg set insn. */
12263 if (recursion.count () > 1)
12264 return false;
12266 /* Early accept known possible operands before doing recog. */
12267 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode)
12268 || negt_reg_operand (op, mode))
12269 return true;
12271 /* Early reject impossible operands before doing recog.
12272 There are some (set ((t) (subreg ...))) patterns, but we must be careful
12273 not to allow any invalid reg-reg or mem-reg moves, or else other passes
12274 such as lower-subreg will bail out. Some insns such as SH4A movua are
12275 done with UNSPEC, so must reject those, too, or else it would result
12276 in an invalid reg -> treg move. */
12277 if (CONST_INT_P (op) || register_operand (op, mode)
12278 || memory_operand (op, mode) || sh_unspec_insn_p (op))
12279 return false;
12281 if (!can_create_pseudo_p ())
12282 return false;
12284 /* expand_debug_locations may call this to compute rtx costs at
12285 very early stage. In that case, don't make new insns here to
12286 avoid codegen differences with -g. */
12287 if (currently_expanding_to_rtl)
12288 return false;
12290 /* We are going to invoke recog in a re-entrant way and thus
12291 have to capture its current state and restore it afterwards. */
12292 recog_data_d prev_recog_data = recog_data;
12294 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
12295 SET_PREV_INSN (i) = NULL;
12296 SET_NEXT_INSN (i) = NULL;
12298 /* If the comparison op doesn't have a result mode, set it to SImode. */
12299 machine_mode prev_op_mode = GET_MODE (op);
12300 if (COMPARISON_P (op) && prev_op_mode == VOIDmode)
12301 PUT_MODE (op, SImode);
12303 int result = recog (PATTERN (i), i, 0);
12305 /* It seems there is no insn like that. Create a negated version and
12306 try again. If we hit a negated form, we'll allow that and append a
12307 nott sequence when splitting out the insns. Insns that do the split
12308 can then remove the trailing nott if they know how to deal with it. */
12309 if (result < 0 && COMPARISON_P (op))
12311 machine_mode cmp_mode = GET_MODE (XEXP (op, 0));
12312 if (cmp_mode == VOIDmode)
12313 cmp_mode = GET_MODE (XEXP (op, 1));
12315 rtx_code prev_code = GET_CODE (op);
12316 PUT_CODE (op, reverse_condition (GET_CODE (op)));
12317 result = recog (PATTERN (i), i, 0);
12318 PUT_CODE (op, prev_code);
12321 PUT_MODE (op, prev_op_mode);
12322 recog_data = prev_recog_data;
12323 return result >= 0;
12326 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
12327 This can be used as a condition for insn/split patterns to allow certain
12328 T bit setting patters only to be matched as sub expressions of other
12329 patterns. */
12330 bool
12331 sh_in_recog_treg_set_expr (void)
12333 return sh_recog_treg_set_expr_reent_count > 0;
12336 /* Given an rtx x, which is assumed to be some expression that has been
12337 matched by the 'treg_set_expr' predicate before, split and emit the
12338 insns that are necessary to calculate the expression and store the result
12339 in the T bit.
12340 The splitting is done recursively similar to 'try_split' in emit-rt.c.
12341 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
12342 'delete_insn' which then causes the DF parts to bail out, because we
12343 currently are inside another gen_split* function and would invoke
12344 'try_split' in a reentrant way. */
12345 static std::pair<rtx_insn*, rtx_insn*>
12346 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
12348 if (dump_file)
12350 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
12351 print_rtl_single (dump_file, i);
12352 fprintf (dump_file, "\n");
12355 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
12357 if (seq == NULL)
12358 return std::make_pair (i, i);
12360 /* Avoid infinite splitter loops if any insn of the result matches
12361 the original pattern. */
12362 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
12363 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
12364 return std::make_pair (i, i);
12366 unshare_all_rtl_in_chain (seq);
12368 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
12369 a linked list, replace the single insn with the new insns. */
12370 rtx_insn* seqlast = seq;
12371 while (NEXT_INSN (seqlast) != NULL)
12372 seqlast = NEXT_INSN (seqlast);
12374 if (rtx_insn* iprev = PREV_INSN (i))
12375 SET_NEXT_INSN (iprev) = seq;
12376 if (rtx_insn* inext = NEXT_INSN (i))
12377 SET_PREV_INSN (inext) = seqlast;
12379 SET_PREV_INSN (seq) = PREV_INSN (i);
12380 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
12382 SET_PREV_INSN (i) = NULL;
12383 SET_NEXT_INSN (i) = NULL;
12385 /* Recursively split all insns. */
12386 for (i = seq; ; i = NEXT_INSN (i))
12388 std::pair<rtx_insn*, rtx_insn*> ii =
12389 sh_try_split_insn_simple (i, curr_insn, n + 1);
12390 if (i == seq)
12391 seq = ii.first;
12392 if (i == seqlast)
12394 seqlast = ii.second;
12395 break;
12397 i = ii.first;
12400 return std::make_pair (seq, seqlast);
12403 sh_treg_insns
12404 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
12406 if (t_reg_operand (x, VOIDmode))
12407 return sh_treg_insns ();
12409 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
12411 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
12412 SET_PREV_INSN (i) = NULL;
12413 SET_NEXT_INSN (i) = NULL;
12415 if (dump_file)
12417 fprintf (dump_file, "split_treg_set_expr insn:\n");
12418 print_rtl (dump_file, i);
12419 fprintf (dump_file, "\n");
12422 /* If the insn is not found, we will try a negated form and append
12423 a nott. */
12424 bool append_nott = false;
12426 /* We are going to invoke recog/split_insns in a re-entrant way and thus
12427 have to capture its current state and restore it afterwards. */
12428 recog_data_d prev_recog_data = recog_data;
12430 if (negt_reg_operand (x, GET_MODE (x)))
12432 /* This is a normal movt followed by a nott. It will be converted
12433 into a movrt after initial expansion. */
12434 XEXP (PATTERN (i), 1) = get_t_reg_rtx ();
12435 append_nott = true;
12437 else
12439 /* If the comparison op doesn't have a mode set, set it to SImode. */
12440 if (COMPARISON_P (x) && GET_MODE (x) == VOIDmode)
12441 PUT_MODE (x, SImode);
12443 int insn_code = recog (PATTERN (i), i, 0);
12445 if (insn_code < 0 && COMPARISON_P (x))
12447 machine_mode cmp_mode = GET_MODE (XEXP (x, 0));
12448 if (cmp_mode == VOIDmode)
12449 cmp_mode = GET_MODE (XEXP (x, 1));
12451 PUT_CODE (x, reverse_condition (GET_CODE (x)));
12452 insn_code = recog (PATTERN (i), i, 0);
12453 append_nott = true;
12456 gcc_assert (insn_code >= 0);
12459 /* Try to recursively split the insn. Some insns might refuse to split
12460 any further while we are in the treg_set_expr splitting phase. They
12461 will be emitted as part of the outer insn and then split again. */
12462 std::pair<rtx_insn*, rtx_insn*> insnlist =
12463 sh_try_split_insn_simple (i, curr_insn);
12465 /* Restore recog state. */
12466 recog_data = prev_recog_data;
12468 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
12469 ? insnlist.second
12470 : NULL;
12471 if (dump_file)
12473 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
12474 print_rtl (dump_file, insnlist.first);
12475 fprintf (dump_file, "\n");
12477 if (nott_insn != NULL)
12478 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
12481 emit_insn (insnlist.first);
12483 if (nott_insn != NULL && append_nott)
12485 if (dump_file)
12486 fprintf (dump_file, "removing trailing nott\n");
12487 remove_insn (nott_insn);
12488 nott_insn = NULL;
12489 append_nott = false;
12492 if (append_nott)
12493 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
12495 rtx_insn* first_insn = get_insns ();
12497 if (dump_file)
12499 fprintf (dump_file, "resulting insns:\n");
12500 print_rtl (dump_file, first_insn);
12501 fprintf (dump_file, "\n");
12504 return sh_treg_insns (first_insn, nott_insn);
12507 /*------------------------------------------------------------------------------
12508 Mode switching support code.
12511 static void
12512 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
12513 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
12515 if ((TARGET_SH4A_FP || TARGET_SH4_300)
12516 && prev_mode != FP_MODE_NONE && prev_mode != mode)
12518 emit_insn (gen_toggle_pr ());
12519 if (TARGET_FMOVD)
12520 emit_insn (gen_toggle_sz ());
12522 else if (mode != FP_MODE_NONE)
12524 rtx tmp = gen_reg_rtx (SImode);
12525 emit_insn (gen_sts_fpscr (tmp));
12526 rtx i = NULL;
12528 const unsigned HOST_WIDE_INT fpbits =
12529 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
12531 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
12532 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12533 else if (mode == FP_MODE_SINGLE)
12534 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
12535 else if (mode == FP_MODE_DOUBLE)
12536 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12537 else
12538 gcc_unreachable ();
12540 emit_insn (i);
12541 emit_insn (gen_lds_fpscr (tmp));
12545 static int
12546 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
12548 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
12551 static int
12552 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
12554 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
12555 get_attr_fp_set (insn) != FP_SET_NONE)
12556 return (int) get_attr_fp_set (insn);
12557 else
12558 return mode;
12561 static int
12562 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
12564 return NORMAL_MODE (entity);
12567 static int
12568 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
12570 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
12573 static int
12574 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
12576 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
12579 /*------------------------------------------------------------------------------
12580 Misc
12583 /* Return true if we use LRA instead of reload pass. */
12584 bool
12585 sh_lra_p (void)
12587 return sh_lra_flag;
12590 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
12592 static bool
12593 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
12594 unsigned int align,
12595 enum by_pieces_operation op,
12596 bool speed_p)
12598 switch (op)
12600 case MOVE_BY_PIECES:
12601 return by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1, op)
12602 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12603 case STORE_BY_PIECES:
12604 case SET_BY_PIECES:
12605 return by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1, op)
12606 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12607 default:
12608 return default_use_by_pieces_infrastructure_p (size, align,
12609 op, speed_p);
12613 bool
12614 sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
12615 rtx x ATTRIBUTE_UNUSED)
12617 return TARGET_FDPIC;
12620 /* Emit insns to load the function address from FUNCDESC (an FDPIC
12621 function descriptor) into r1 and the GOT address into r12,
12622 returning an rtx for r1. */
12625 sh_load_function_descriptor (rtx funcdesc)
12627 rtx r1 = gen_rtx_REG (Pmode, R1_REG);
12628 rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
12629 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
12630 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
12632 emit_move_insn (r1, fnaddr);
12633 /* The ABI requires the entry point address to be loaded first, so
12634 prevent the load from being moved after that of the GOT
12635 address. */
12636 emit_insn (gen_blockage ());
12637 emit_move_insn (pic_reg, gotaddr);
12638 return r1;
12641 /* Return an rtx holding the initial value of the FDPIC register (the
12642 FDPIC pointer passed in from the caller). */
12645 sh_get_fdpic_reg_initial_val (void)
12647 return get_hard_reg_initial_val (Pmode, PIC_REG);
12650 #include "gt-sh.h"