PR c/81544 - attribute noreturn and warn_unused_result on the same function accepted
[official-gcc.git] / gcc / config / sh / sh.c
blob0d7d7bc53ca21fdec925feddd67247b548e51d62
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2017 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
24 #include "config.h"
25 #define INCLUDE_VECTOR
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "gimple.h"
33 #include "cfghooks.h"
34 #include "df.h"
35 #include "memmodel.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "emit-rtl.h"
41 #include "recog.h"
42 #include "diagnostic-core.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
46 #include "calls.h"
47 #include "varasm.h"
48 #include "flags.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "reload.h"
52 #include "output.h"
53 #include "insn-attr.h"
54 #include "dwarf2.h"
55 #include "langhooks.h"
56 #include "cfgrtl.h"
57 #include "intl.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "tm-constrs.h"
61 #include "opts.h"
62 #include "tree-pass.h"
63 #include "context.h"
64 #include "builtins.h"
65 #include "rtl-iter.h"
66 #include "regs.h"
68 /* This file should be included last. */
69 #include "target-def.h"
71 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
73 #define CONST_OK_FOR_ADD(size) CONST_OK_FOR_I08 (size)
74 #define GEN_MOV (*(gen_movsi))
75 #define GEN_ADD3 (*(gen_addsi3))
76 #define GEN_SUB3 (*(gen_subsi3))
78 /* Used to simplify the logic below. Find the attributes wherever
79 they may be. */
80 #define SH_ATTRIBUTES(decl) \
81 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
82 : DECL_ATTRIBUTES (decl) \
83 ? (DECL_ATTRIBUTES (decl)) \
84 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
86 /* Set to true by expand_prologue() when the function is an
87 interrupt handler. */
88 bool current_function_interrupt;
90 tree sh_deferred_function_attributes;
91 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
93 /* Global variables for machine-dependent things. */
95 /* Which cpu are we scheduling for. */
96 enum processor_type sh_cpu;
98 /* Definitions used in ready queue reordering for first scheduling pass. */
100 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
101 static short *regmode_weight[2];
103 /* Total SFmode and SImode weights of scheduled insns. */
104 static int curr_regmode_pressure[2];
106 /* Number of r0 life regions. */
107 static int r0_life_regions;
109 /* If true, skip cycles for Q -> R movement. */
110 static int skip_cycles = 0;
112 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
113 and returned from sh_reorder2. */
114 static short cached_can_issue_more;
116 /* Unique number for UNSPEC_BBR pattern. */
117 static unsigned int unspec_bbr_uid = 1;
119 /* Provides the class number of the smallest class containing
120 reg number. */
121 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
123 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
156 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
157 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
158 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
159 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
160 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
161 GENERAL_REGS, GENERAL_REGS,
164 char sh_register_names[FIRST_PSEUDO_REGISTER] \
165 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
167 char sh_additional_register_names[ADDREGNAMES_SIZE] \
168 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
169 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
171 int assembler_dialect;
173 static void split_branches (rtx_insn *);
174 static int branch_dest (rtx);
175 static void print_slot (rtx_sequence *);
176 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
177 static void dump_table (rtx_insn *, rtx_insn *);
178 static bool broken_move (rtx_insn *);
179 static bool mova_p (rtx_insn *);
180 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
181 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
182 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
183 static void sh_reorg (void);
184 static void sh_option_override (void);
185 static void sh_override_options_after_change (void);
186 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
187 static rtx_insn* emit_frame_insn (rtx);
188 static rtx push (int);
189 static void pop (int);
190 static void push_regs (HARD_REG_SET* mask, bool interrupt_handler);
191 static int calc_live_regs (HARD_REG_SET *);
192 static HOST_WIDE_INT rounded_frame_size (int);
193 static bool sh_frame_pointer_required (void);
194 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
195 static int sh_mode_needed (int, rtx_insn *);
196 static int sh_mode_after (int, int, rtx_insn *);
197 static int sh_mode_entry (int);
198 static int sh_mode_exit (int);
199 static int sh_mode_priority (int entity, int n);
201 static rtx mark_constant_pool_use (rtx);
202 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
203 int, bool *);
204 static tree sh_handle_resbank_handler_attribute (tree *, tree,
205 tree, int, bool *);
206 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
207 tree, int, bool *);
208 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
209 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
210 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
211 static void sh_print_operand (FILE *, rtx, int);
212 static void sh_print_operand_address (FILE *, machine_mode, rtx);
213 static bool sh_print_operand_punct_valid_p (unsigned char code);
214 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
215 static void sh_output_function_epilogue (FILE *);
216 static void sh_insert_attributes (tree, tree *);
217 static const char *sh_check_pch_target_flags (int);
218 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
219 static int sh_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
220 static int sh_issue_rate (void);
221 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
222 static short find_set_regmode_weight (rtx, machine_mode);
223 static short find_insn_regmode_weight (rtx, machine_mode);
224 static void find_regmode_weight (basic_block, machine_mode);
225 static int find_r0_life_regions (basic_block);
226 static void sh_md_init_global (FILE *, int, int);
227 static void sh_md_finish_global (FILE *, int);
228 static int rank_for_reorder (const void *, const void *);
229 static void swap_reorder (rtx_insn **, int);
230 static void ready_reorder (rtx_insn **, int);
231 static bool high_pressure (machine_mode);
232 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
233 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
234 static void sh_md_init (FILE *, int, int);
235 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
237 static bool sh_function_ok_for_sibcall (tree, tree);
239 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
240 static bool sh_ms_bitfield_layout_p (const_tree);
242 static void sh_init_builtins (void);
243 static tree sh_builtin_decl (unsigned, bool);
244 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
245 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
246 HOST_WIDE_INT, tree);
247 static void sh_file_start (void);
248 static bool sh_assemble_integer (rtx, unsigned int, int);
249 static bool flow_dependent_p (rtx, rtx);
250 static void flow_dependent_p_1 (rtx, const_rtx, void *);
251 static int shiftcosts (rtx);
252 static int and_xor_ior_costs (rtx, int);
253 static int addsubcosts (rtx);
254 static int multcosts (rtx);
255 static bool unspec_caller_rtx_p (rtx);
256 static bool sh_cannot_copy_insn_p (rtx_insn *);
257 static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
258 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
259 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
260 static int sh_pr_n_sets (void);
261 static rtx sh_allocate_initial_value (rtx);
262 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
263 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
264 machine_mode,
265 struct secondary_reload_info *);
266 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
267 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
268 static rtx sh_delegitimize_address (rtx);
269 static bool sh_cannot_substitute_mem_equiv_p (rtx);
270 static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode);
271 static int scavenge_reg (HARD_REG_SET *s);
273 static rtx sh_struct_value_rtx (tree, int);
274 static rtx sh_function_value (const_tree, const_tree, bool);
275 static bool sh_function_value_regno_p (const unsigned int);
276 static rtx sh_libcall_value (machine_mode, const_rtx);
277 static bool sh_return_in_memory (const_tree, const_tree);
278 static rtx sh_builtin_saveregs (void);
279 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
280 tree, int *, int);
281 static bool sh_strict_argument_naming (cumulative_args_t);
282 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
283 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
284 static tree sh_build_builtin_va_list (void);
285 static void sh_va_start (tree, rtx);
286 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
287 static bool sh_promote_prototypes (const_tree);
288 static machine_mode sh_promote_function_mode (const_tree type,
289 machine_mode,
290 int *punsignedp,
291 const_tree funtype,
292 int for_return);
293 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
294 const_tree, bool);
295 static bool sh_callee_copies (cumulative_args_t, machine_mode,
296 const_tree, bool);
297 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
298 tree, bool);
299 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
300 const_tree, bool);
301 static rtx sh_function_arg (cumulative_args_t, machine_mode,
302 const_tree, bool);
303 static int sh_dwarf_calling_convention (const_tree);
304 static void sh_encode_section_info (tree, rtx, int);
305 static bool sh2a_function_vector_p (tree);
306 static void sh_trampoline_init (rtx, tree, rtx);
307 static rtx sh_trampoline_adjust_address (rtx);
308 static void sh_conditional_register_usage (void);
309 static bool sh_legitimate_constant_p (machine_mode, rtx);
310 static int mov_insn_size (machine_mode, bool);
311 static int mov_insn_alignment_mask (machine_mode, bool);
312 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
313 unsigned int,
314 enum by_pieces_operation,
315 bool);
316 static bool sequence_insn_p (rtx_insn *);
317 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
318 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
319 machine_mode, bool);
320 static bool sh_legitimate_combined_insn (rtx_insn* insn);
322 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
324 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
325 static unsigned int sh_hard_regno_nregs (unsigned int, machine_mode);
326 static bool sh_hard_regno_mode_ok (unsigned int, machine_mode);
327 static bool sh_modes_tieable_p (machine_mode, machine_mode);
328 static bool sh_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
330 static const struct attribute_spec sh_attribute_table[] =
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
333 affects_type_identity, exclusions } */
334 { "interrupt_handler", 0, 0, true, false, false,
335 sh_handle_interrupt_handler_attribute, false, NULL },
336 { "sp_switch", 1, 1, true, false, false,
337 sh_handle_sp_switch_attribute, false, NULL },
338 { "trap_exit", 1, 1, true, false, false,
339 sh_handle_trap_exit_attribute, false, NULL },
340 { "renesas", 0, 0, false, true, false,
341 sh_handle_renesas_attribute, false, NULL },
342 { "trapa_handler", 0, 0, true, false, false,
343 sh_handle_interrupt_handler_attribute, false, NULL },
344 { "nosave_low_regs", 0, 0, true, false, false,
345 sh_handle_interrupt_handler_attribute, false, NULL },
346 { "resbank", 0, 0, true, false, false,
347 sh_handle_resbank_handler_attribute, false, NULL },
348 { "function_vector", 1, 1, true, false, false,
349 sh2a_handle_function_vector_handler_attribute, false, NULL },
350 { NULL, 0, 0, false, false, false, NULL, false, NULL }
353 /* Initialize the GCC target structure. */
354 #undef TARGET_ATTRIBUTE_TABLE
355 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
357 /* The next two are used for debug info when compiling with -gdwarf. */
358 #undef TARGET_ASM_UNALIGNED_HI_OP
359 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
360 #undef TARGET_ASM_UNALIGNED_SI_OP
361 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
363 #undef TARGET_OPTION_OVERRIDE
364 #define TARGET_OPTION_OVERRIDE sh_option_override
366 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
367 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
368 sh_override_options_after_change
370 #undef TARGET_PRINT_OPERAND
371 #define TARGET_PRINT_OPERAND sh_print_operand
372 #undef TARGET_PRINT_OPERAND_ADDRESS
373 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
374 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
375 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
376 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
377 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
379 #undef TARGET_ASM_FUNCTION_EPILOGUE
380 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
382 #undef TARGET_ASM_OUTPUT_MI_THUNK
383 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
385 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
386 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
387 hook_bool_const_tree_hwi_hwi_const_tree_true
389 #undef TARGET_ASM_FILE_START
390 #define TARGET_ASM_FILE_START sh_file_start
391 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
392 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
394 #undef TARGET_ASM_INTEGER
395 #define TARGET_ASM_INTEGER sh_assemble_integer
397 #undef TARGET_REGISTER_MOVE_COST
398 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
400 #undef TARGET_INSERT_ATTRIBUTES
401 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
403 #undef TARGET_SCHED_ADJUST_COST
404 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
406 #undef TARGET_SCHED_ISSUE_RATE
407 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
409 /* The next 5 hooks have been implemented for reenabling sched1. With the
410 help of these macros we are limiting the movement of insns in sched1 to
411 reduce the register pressure. The overall idea is to keep count of SImode
412 and SFmode regs required by already scheduled insns. When these counts
413 cross some threshold values; give priority to insns that free registers.
414 The insn that frees registers is most likely to be the insn with lowest
415 LUID (original insn order); but such an insn might be there in the stalled
416 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
417 up to a max of 8 cycles so that such insns may move from Q -> R.
419 The description of the hooks are as below:
421 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
422 scheduler; it is called inside the sched_init function just after
423 find_insn_reg_weights function call. It is used to calculate the SImode
424 and SFmode weights of insns of basic blocks; much similar to what
425 find_insn_reg_weights does.
426 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
428 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
429 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
430 (Q)->(R).
432 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
433 high; reorder the ready queue so that the insn with lowest LUID will be
434 issued next.
436 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
437 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
439 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
440 can be returned from TARGET_SCHED_REORDER2.
442 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
444 #undef TARGET_SCHED_DFA_NEW_CYCLE
445 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
447 #undef TARGET_SCHED_INIT_GLOBAL
448 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
450 #undef TARGET_SCHED_FINISH_GLOBAL
451 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
453 #undef TARGET_SCHED_VARIABLE_ISSUE
454 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
456 #undef TARGET_SCHED_REORDER
457 #define TARGET_SCHED_REORDER sh_reorder
459 #undef TARGET_SCHED_REORDER2
460 #define TARGET_SCHED_REORDER2 sh_reorder2
462 #undef TARGET_SCHED_INIT
463 #define TARGET_SCHED_INIT sh_md_init
465 #undef TARGET_DELEGITIMIZE_ADDRESS
466 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
468 #undef TARGET_LEGITIMIZE_ADDRESS
469 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
471 #undef TARGET_CAN_FOLLOW_JUMP
472 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
474 #undef TARGET_MS_BITFIELD_LAYOUT_P
475 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
477 #undef TARGET_INIT_BUILTINS
478 #define TARGET_INIT_BUILTINS sh_init_builtins
479 #undef TARGET_BUILTIN_DECL
480 #define TARGET_BUILTIN_DECL sh_builtin_decl
481 #undef TARGET_EXPAND_BUILTIN
482 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
484 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
485 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
487 #undef TARGET_CANNOT_COPY_INSN_P
488 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
489 #undef TARGET_RTX_COSTS
490 #define TARGET_RTX_COSTS sh_rtx_costs
491 #undef TARGET_ADDRESS_COST
492 #define TARGET_ADDRESS_COST sh_address_cost
493 #undef TARGET_ALLOCATE_INITIAL_VALUE
494 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
496 #undef TARGET_MACHINE_DEPENDENT_REORG
497 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
499 #undef TARGET_DWARF_REGISTER_SPAN
500 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
502 #ifdef HAVE_AS_TLS
503 #undef TARGET_HAVE_TLS
504 #define TARGET_HAVE_TLS true
505 #endif
507 #undef TARGET_PROMOTE_PROTOTYPES
508 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
509 #undef TARGET_PROMOTE_FUNCTION_MODE
510 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
512 #undef TARGET_FUNCTION_VALUE
513 #define TARGET_FUNCTION_VALUE sh_function_value
514 #undef TARGET_FUNCTION_VALUE_REGNO_P
515 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
516 #undef TARGET_LIBCALL_VALUE
517 #define TARGET_LIBCALL_VALUE sh_libcall_value
518 #undef TARGET_STRUCT_VALUE_RTX
519 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
520 #undef TARGET_RETURN_IN_MEMORY
521 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
523 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
524 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
525 #undef TARGET_SETUP_INCOMING_VARARGS
526 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
527 #undef TARGET_STRICT_ARGUMENT_NAMING
528 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
529 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
530 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
531 #undef TARGET_MUST_PASS_IN_STACK
532 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
533 #undef TARGET_PASS_BY_REFERENCE
534 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
535 #undef TARGET_CALLEE_COPIES
536 #define TARGET_CALLEE_COPIES sh_callee_copies
537 #undef TARGET_ARG_PARTIAL_BYTES
538 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
539 #undef TARGET_FUNCTION_ARG
540 #define TARGET_FUNCTION_ARG sh_function_arg
541 #undef TARGET_FUNCTION_ARG_ADVANCE
542 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
544 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
545 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
547 #undef TARGET_BUILD_BUILTIN_VA_LIST
548 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
549 #undef TARGET_EXPAND_BUILTIN_VA_START
550 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
551 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
552 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
554 #undef TARGET_VECTOR_MODE_SUPPORTED_P
555 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
557 #undef TARGET_CHECK_PCH_TARGET_FLAGS
558 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
560 #undef TARGET_DWARF_CALLING_CONVENTION
561 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
563 #undef TARGET_FRAME_POINTER_REQUIRED
564 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
566 #undef TARGET_MODE_EMIT
567 #define TARGET_MODE_EMIT sh_emit_mode_set
569 #undef TARGET_MODE_NEEDED
570 #define TARGET_MODE_NEEDED sh_mode_needed
572 #undef TARGET_MODE_AFTER
573 #define TARGET_MODE_AFTER sh_mode_after
575 #undef TARGET_MODE_ENTRY
576 #define TARGET_MODE_ENTRY sh_mode_entry
578 #undef TARGET_MODE_EXIT
579 #define TARGET_MODE_EXIT sh_mode_exit
581 #undef TARGET_MODE_PRIORITY
582 #define TARGET_MODE_PRIORITY sh_mode_priority
584 /* Return regmode weight for insn. */
585 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
586 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
588 /* Return current register pressure for regmode. */
589 #define CURR_REGMODE_PRESSURE(MODE)\
590 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
592 #undef TARGET_ENCODE_SECTION_INFO
593 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
595 #undef TARGET_LRA_P
596 #define TARGET_LRA_P sh_lra_p
598 #undef TARGET_SECONDARY_RELOAD
599 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
601 #undef TARGET_PREFERRED_RELOAD_CLASS
602 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
604 #undef TARGET_CONDITIONAL_REGISTER_USAGE
605 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
607 #undef TARGET_LEGITIMATE_ADDRESS_P
608 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
610 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
611 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
613 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
614 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
615 sh_legitimize_address_displacement
617 #undef TARGET_TRAMPOLINE_INIT
618 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
619 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
620 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
622 #undef TARGET_LEGITIMATE_CONSTANT_P
623 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
625 #undef TARGET_CANONICALIZE_COMPARISON
626 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
628 #undef TARGET_LEGITIMATE_COMBINED_INSN
629 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
631 #undef TARGET_FIXED_CONDITION_CODE_REGS
632 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
634 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
635 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
636 sh_use_by_pieces_infrastructure_p
638 /* Machine-specific symbol_ref flags. */
639 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
641 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
642 is used by optabs.c atomic op expansion code as well as in sync.md. */
643 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
644 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
646 #undef TARGET_CANNOT_FORCE_CONST_MEM
647 #define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
649 #undef TARGET_HARD_REGNO_NREGS
650 #define TARGET_HARD_REGNO_NREGS sh_hard_regno_nregs
651 #undef TARGET_HARD_REGNO_MODE_OK
652 #define TARGET_HARD_REGNO_MODE_OK sh_hard_regno_mode_ok
654 #undef TARGET_MODES_TIEABLE_P
655 #define TARGET_MODES_TIEABLE_P sh_modes_tieable_p
657 #undef TARGET_CAN_CHANGE_MODE_CLASS
658 #define TARGET_CAN_CHANGE_MODE_CLASS sh_can_change_mode_class
660 #undef TARGET_CONSTANT_ALIGNMENT
661 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
663 struct gcc_target targetm = TARGET_INITIALIZER;
666 /* Information on the currently selected atomic model.
667 This is initialized in sh_option_override. */
668 static sh_atomic_model selected_atomic_model_;
670 const sh_atomic_model&
671 selected_atomic_model (void)
673 return selected_atomic_model_;
676 static sh_atomic_model
677 parse_validate_atomic_model_option (const char* str)
679 const char* model_names[sh_atomic_model::num_models];
680 model_names[sh_atomic_model::none] = "none";
681 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
682 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
683 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
684 model_names[sh_atomic_model::soft_imask] = "soft-imask";
686 const char* model_cdef_names[sh_atomic_model::num_models];
687 model_cdef_names[sh_atomic_model::none] = "NONE";
688 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
689 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
690 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
691 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
693 sh_atomic_model ret;
694 ret.type = sh_atomic_model::none;
695 ret.name = model_names[sh_atomic_model::none];
696 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
697 ret.strict = false;
698 ret.tcb_gbr_offset = -1;
700 /* Handle empty string as 'none'. */
701 if (str == NULL || *str == '\0')
702 return ret;
704 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
706 std::vector<std::string> tokens;
707 for (std::stringstream ss (str); ss.good (); )
709 tokens.push_back (std::string ());
710 std::getline (ss, tokens.back (), ',');
713 if (tokens.empty ())
714 err_ret ("invalid atomic model option");
716 /* The first token must be the atomic model name. */
718 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
719 if (tokens.front () == model_names[i])
721 ret.type = (sh_atomic_model::enum_type)i;
722 ret.name = model_names[i];
723 ret.cdef_name = model_cdef_names[i];
724 goto got_mode_name;
727 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
728 got_mode_name:;
731 /* Go through the remaining tokens. */
732 for (size_t i = 1; i < tokens.size (); ++i)
734 if (tokens[i] == "strict")
735 ret.strict = true;
736 else if (tokens[i].find ("gbr-offset=") == 0)
738 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
739 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
740 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
741 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
742 "option", offset_str.c_str ());
744 else
745 err_ret ("unknown parameter \"%s\" in atomic model option",
746 tokens[i].c_str ());
749 /* Check that the selection makes sense. */
750 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
751 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
752 ret.name);
754 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
755 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
757 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
758 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
760 if (ret.type == sh_atomic_model::soft_tcb
761 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
762 || (ret.tcb_gbr_offset & 3) != 0))
763 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
764 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
765 ret.name);
767 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
768 err_ret ("cannot use atomic model %s in user mode", ret.name);
770 return ret;
772 #undef err_ret
775 /* Register SH specific RTL passes. */
776 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
777 const char* name);
778 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
779 const char* name);
780 static void
781 register_sh_passes (void)
783 /* Running the sh_treg_combine pass after ce1 generates better code when
784 comparisons are combined and reg-reg moves are introduced, because
785 reg-reg moves will be eliminated afterwards. However, there are quite
786 some cases where combine will be unable to fold comparison related insns,
787 thus for now don't do it.
788 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
789 PASS_POS_INSERT_AFTER, "ce1", 1);
792 /* Run sh_treg_combine pass after combine but before register allocation. */
793 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
794 PASS_POS_INSERT_AFTER, "split1", 1);
796 /* Run sh_treg_combine pass after register allocation and basic block
797 reordering as this sometimes creates new opportunities. */
798 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
799 PASS_POS_INSERT_AFTER, "split4", 1);
801 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
802 is known after a conditional branch.
803 This must be done after basic blocks and branch conditions have
804 stabilized and won't be changed by further passes. */
805 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
806 PASS_POS_INSERT_BEFORE, "sched2", 1);
809 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
810 various options, and do some machine dependent initialization. */
811 static void
812 sh_option_override (void)
814 int regno;
816 SUBTARGET_OVERRIDE_OPTIONS;
818 sh_cpu = PROCESSOR_SH1;
819 assembler_dialect = 0;
820 if (TARGET_SH2)
821 sh_cpu = PROCESSOR_SH2;
822 if (TARGET_SH2E)
823 sh_cpu = PROCESSOR_SH2E;
824 if (TARGET_SH2A)
825 sh_cpu = PROCESSOR_SH2A;
826 if (TARGET_SH3)
827 sh_cpu = PROCESSOR_SH3;
828 if (TARGET_SH3E)
829 sh_cpu = PROCESSOR_SH3E;
830 if (TARGET_SH4)
832 assembler_dialect = 1;
833 sh_cpu = PROCESSOR_SH4;
835 if (TARGET_SH4A)
837 assembler_dialect = 1;
838 sh_cpu = PROCESSOR_SH4A;
841 /* User/priviledged mode is supported only on SH3* and SH4*.
842 Disable it for everything else. */
843 if (!TARGET_SH3 && TARGET_USERMODE)
844 TARGET_USERMODE = false;
846 if (! strcmp (sh_div_str, "call-div1"))
847 sh_div_strategy = SH_DIV_CALL_DIV1;
848 else if (! strcmp (sh_div_str, "call-fp") && TARGET_FPU_ANY)
849 sh_div_strategy = SH_DIV_CALL_FP;
850 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
851 sh_div_strategy = SH_DIV_CALL_TABLE;
852 else
854 /* Pick one that makes most sense for the target in general.
855 It is not much good to use different functions depending on -Os,
856 since then we'll end up with two different functions when some of
857 the code is compiled for size, and some for speed. */
859 /* SH4 tends to emphasize speed. */
860 if (TARGET_HARD_SH4)
861 sh_div_strategy = SH_DIV_CALL_TABLE;
862 /* These have their own way of doing things. */
863 else if (TARGET_SH2A)
864 sh_div_strategy = SH_DIV_INTRINSIC;
865 /* SH1 .. SH3 cores often go into small-footprint systems, so
866 default to the smallest implementation available. */
867 else
868 sh_div_strategy = SH_DIV_CALL_DIV1;
871 if (sh_divsi3_libfunc[0])
872 ; /* User supplied - leave it alone. */
873 else if (TARGET_DIVIDE_CALL_FP)
874 sh_divsi3_libfunc = "__sdivsi3_i4";
875 else if (TARGET_DIVIDE_CALL_TABLE)
876 sh_divsi3_libfunc = "__sdivsi3_i4i";
877 else
878 sh_divsi3_libfunc = "__sdivsi3";
880 if (sh_branch_cost == -1)
882 /* The SH1 does not have delay slots, hence we get a pipeline stall
883 at every branch. The SH4 is superscalar, so the single delay slot
884 is not sufficient to keep both pipelines filled.
885 In any case, set the default branch cost to '2', as it results in
886 slightly overall smaller code and also enables some if conversions
887 that are required for matching special T bit related insns. */
888 sh_branch_cost = 2;
891 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
892 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
893 TARGET_ZDCBRANCH = 1;
895 /* FDPIC code is a special form of PIC, and the vast majority of code
896 generation constraints that apply to PIC also apply to FDPIC, so we
897 set flag_pic to avoid the need to check TARGET_FDPIC everywhere
898 flag_pic is checked. */
899 if (TARGET_FDPIC && !flag_pic)
900 flag_pic = 2;
902 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
903 if (! VALID_REGISTER_P (regno))
904 sh_register_names[regno][0] = '\0';
906 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
907 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
908 sh_additional_register_names[regno][0] = '\0';
910 if (flag_pic && ! TARGET_PREFERGOT)
911 flag_no_function_cse = 1;
913 if (targetm.small_register_classes_for_mode_p (VOIDmode))
915 /* Never run scheduling before reload, since that can
916 break global alloc, and generates slower code anyway due
917 to the pressure on R0. */
918 /* Enable sched1 for SH4 if the user explicitly requests.
919 When sched1 is enabled, the ready queue will be reordered by
920 the target hooks if pressure is high. We can not do this for
921 PIC, SH3 and lower as they give spill failures for R0. */
922 if (!TARGET_HARD_SH4 || flag_pic)
923 flag_schedule_insns = 0;
924 /* ??? Current exception handling places basic block boundaries
925 after call_insns. It causes the high pressure on R0 and gives
926 spill failures for R0 in reload. See PR 22553 and the thread
927 on gcc-patches
928 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
929 else if (flag_exceptions)
931 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
932 warning (0, "ignoring -fschedule-insns because of exception "
933 "handling bug");
934 flag_schedule_insns = 0;
936 else if (flag_schedule_insns
937 && !global_options_set.x_flag_schedule_insns)
938 flag_schedule_insns = 0;
941 /* Unwind info is not correct around the CFG unless either a frame
942 pointer is present or M_A_O_A is set. Fixing this requires rewriting
943 unwind info generation to be aware of the CFG and propagating states
944 around edges. */
945 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
946 || flag_exceptions || flag_non_call_exceptions)
947 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
949 warning (0, "unwind tables currently require either a frame pointer "
950 "or -maccumulate-outgoing-args for correctness");
951 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
954 if (flag_unsafe_math_optimizations)
956 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
957 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
958 TARGET_FSCA = 1;
960 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
961 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
962 TARGET_FSRRA = 1;
965 /* Allow fsrra insn only if -funsafe-math-optimizations and
966 -ffinite-math-only is enabled. */
967 TARGET_FSRRA = TARGET_FSRRA
968 && flag_unsafe_math_optimizations
969 && flag_finite_math_only;
971 /* If the -mieee option was not explicitly set by the user, turn it on
972 unless -ffinite-math-only was specified. See also PR 33135. */
973 if (! global_options_set.x_TARGET_IEEE)
974 TARGET_IEEE = ! flag_finite_math_only;
976 if (sh_fixed_range_str)
977 sh_fix_range (sh_fixed_range_str);
979 /* This target defaults to strict volatile bitfields. */
980 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
981 flag_strict_volatile_bitfields = 1;
983 sh_override_options_after_change ();
985 /* Parse atomic model option and make sure it is valid for the current
986 target CPU. */
987 selected_atomic_model_
988 = parse_validate_atomic_model_option (sh_atomic_model_str);
990 register_sh_passes ();
993 /* Implement targetm.override_options_after_change. */
995 static void
996 sh_override_options_after_change (void)
998 /* Adjust loop, jump and function alignment values (in bytes), if those
999 were not specified by the user using -falign-loops, -falign-jumps
1000 and -falign-functions options.
1001 32 bit alignment is better for speed, because instructions can be
1002 fetched as a pair from a longword boundary. For size use 16 bit
1003 alignment to get more compact code.
1004 Aligning all jumps increases the code size, even if it might
1005 result in slightly faster code. Thus, it is set to the smallest
1006 alignment possible if not specified by the user. */
1007 if (align_loops == 0)
1008 align_loops = optimize_size ? 2 : 4;
1010 if (align_jumps == 0)
1011 align_jumps = 2;
1012 else if (align_jumps < 2)
1013 align_jumps = 2;
1015 if (align_functions == 0)
1016 align_functions = optimize_size ? 2 : 4;
1018 /* The linker relaxation code breaks when a function contains
1019 alignments that are larger than that at the start of a
1020 compilation unit. */
1021 if (TARGET_RELAX)
1023 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1025 /* Also take possible .long constants / mova tables into account. */
1026 if (min_align < 4)
1027 min_align = 4;
1028 if (align_functions < min_align)
1029 align_functions = min_align;
1033 /* Print the operand address in x to the stream. */
1034 static void
1035 sh_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
1037 switch (GET_CODE (x))
1039 case REG:
1040 case SUBREG:
1041 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1042 break;
1044 case PLUS:
1046 rtx base = XEXP (x, 0);
1047 rtx index = XEXP (x, 1);
1049 switch (GET_CODE (index))
1051 case CONST_INT:
1052 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1053 reg_names[true_regnum (base)]);
1054 break;
1056 case REG:
1057 case SUBREG:
1059 int base_num = true_regnum (base);
1060 int index_num = true_regnum (index);
1062 /* If base or index is R0, make sure that it comes first.
1063 Usually one of them will be R0, but the order might be wrong.
1064 If neither base nor index are R0 it's an error and we just
1065 pass it on to the assembler. This avoids silent wrong code
1066 bugs. */
1067 if (base_num == 0 && index_num != 0)
1068 std::swap (base_num, index_num);
1070 fprintf (stream, "@(%s,%s)", reg_names[index_num],
1071 reg_names[base_num]);
1072 break;
1075 default:
1076 gcc_unreachable ();
1079 break;
1081 case PRE_DEC:
1082 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1083 break;
1085 case POST_INC:
1086 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1087 break;
1089 default:
1090 x = mark_constant_pool_use (x);
1091 output_addr_const (stream, x);
1092 break;
1096 /* Print operand x (an rtx) in assembler syntax to file stream
1097 according to modifier code.
1099 '.' print a .s if insn needs delay slot
1100 ',' print LOCAL_LABEL_PREFIX
1101 '@' print trap, rte or rts depending upon pragma interruptness
1102 '#' output a nop if there is nothing to put in the delay slot
1103 ''' print likelihood suffix (/u for unlikely).
1104 '>' print branch target if -fverbose-asm
1105 'O' print a constant without the #
1106 'R' print the LSW of a dp value - changes if in little endian
1107 'S' print the MSW of a dp value - changes if in little endian
1108 'T' print the next word of a dp value - same as 'R' in big endian mode.
1109 'M' print .b / .w / .l / .s / .d suffix if operand is a MEM.
1110 'N' print 'r63' if the operand is (const_int 0).
1111 'd' print a V2SF reg as dN instead of fpN.
1112 'm' print a pair `base,offset' or `base,index', for LD and ST.
1113 'U' Likewise for {LD,ST}{HI,LO}.
1114 'V' print the position of a single bit set.
1115 'W' print the position of a single bit cleared.
1116 't' print a memory address which is a register.
1117 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1118 'o' output an operator. */
1119 static void
1120 sh_print_operand (FILE *stream, rtx x, int code)
1122 int regno;
1123 machine_mode mode;
1125 switch (code)
1127 tree trapa_attr;
1129 case '.':
1130 if (final_sequence
1131 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1132 && get_attr_length (final_sequence->insn (1)))
1133 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1134 break;
1135 case ',':
1136 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1137 break;
1138 case '@':
1139 trapa_attr = lookup_attribute ("trap_exit",
1140 DECL_ATTRIBUTES (current_function_decl));
1141 if (trapa_attr)
1142 fprintf (stream, "trapa #%ld",
1143 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1144 else if (sh_cfun_interrupt_handler_p ())
1146 if (sh_cfun_resbank_handler_p ())
1147 fprintf (stream, "resbank\n");
1148 fprintf (stream, "rte");
1150 else
1151 fprintf (stream, "rts");
1152 break;
1153 case '#':
1154 /* Output a nop if there's nothing in the delay slot. */
1155 if (dbr_sequence_length () == 0)
1156 fprintf (stream, "\n\tnop");
1157 break;
1158 case '\'':
1160 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1162 if (note
1163 && profile_probability::from_reg_br_prob_note (XINT (note, 0))
1164 < profile_probability::even ())
1165 fputs ("/u", stream);
1166 break;
1168 case '>':
1169 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1171 fputs ("\t! target: ", stream);
1172 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1174 break;
1175 case 'O':
1176 x = mark_constant_pool_use (x);
1177 output_addr_const (stream, x);
1178 break;
1179 /* N.B.: %R / %S / %T adjust memory addresses by four.
1180 While they can be used to access 64 bit parts of a larger value
1181 held in general purpose registers, that won't work with memory -
1182 neither for fp registers, since the frxx names are used. */
1183 case 'R':
1184 if (REG_P (x) || GET_CODE (x) == SUBREG)
1186 regno = true_regnum (x);
1187 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1188 fputs (reg_names[regno], (stream));
1190 else if (MEM_P (x))
1192 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1193 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1195 else
1197 rtx sub = NULL_RTX;
1199 mode = GET_MODE (x);
1200 if (mode == VOIDmode)
1201 mode = DImode;
1202 if (GET_MODE_SIZE (mode) >= 8)
1203 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1204 if (sub)
1205 sh_print_operand (stream, sub, 0);
1206 else
1207 output_operand_lossage ("invalid operand to %%R");
1209 break;
1210 case 'S':
1211 if (REG_P (x) || GET_CODE (x) == SUBREG)
1213 regno = true_regnum (x);
1214 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1215 fputs (reg_names[regno], (stream));
1217 else if (MEM_P (x))
1219 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1220 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1222 else
1224 rtx sub = NULL_RTX;
1226 mode = GET_MODE (x);
1227 if (mode == VOIDmode)
1228 mode = DImode;
1229 if (GET_MODE_SIZE (mode) >= 8)
1230 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1231 if (sub)
1232 sh_print_operand (stream, sub, 0);
1233 else
1234 output_operand_lossage ("invalid operand to %%S");
1236 break;
1237 case 'T':
1238 /* Next word of a double. */
1239 switch (GET_CODE (x))
1241 case REG:
1242 fputs (reg_names[REGNO (x) + 1], (stream));
1243 break;
1244 case MEM:
1246 machine_mode mode = GET_MODE (x);
1247 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1248 && GET_CODE (XEXP (x, 0)) != POST_INC)
1249 x = adjust_address (x, SImode, 4);
1250 sh_print_operand_address (stream, mode, XEXP (x, 0));
1252 break;
1253 default:
1254 break;
1256 break;
1258 case 't':
1259 gcc_assert (MEM_P (x));
1260 x = XEXP (x, 0);
1261 switch (GET_CODE (x))
1263 case REG:
1264 case SUBREG:
1265 sh_print_operand (stream, x, 0);
1266 break;
1267 default:
1268 break;
1270 break;
1272 case 'o':
1273 switch (GET_CODE (x))
1275 case PLUS: fputs ("add", stream); break;
1276 case MINUS: fputs ("sub", stream); break;
1277 case MULT: fputs ("mul", stream); break;
1278 case DIV: fputs ("div", stream); break;
1279 case EQ: fputs ("eq", stream); break;
1280 case NE: fputs ("ne", stream); break;
1281 case GT: case LT: fputs ("gt", stream); break;
1282 case GE: case LE: fputs ("ge", stream); break;
1283 case GTU: case LTU: fputs ("gtu", stream); break;
1284 case GEU: case LEU: fputs ("geu", stream); break;
1285 default:
1286 break;
1288 break;
1289 case 'M':
1290 if (MEM_P (x))
1292 switch (GET_MODE (x))
1294 case E_QImode: fputs (".b", stream); break;
1295 case E_HImode: fputs (".w", stream); break;
1296 case E_SImode: fputs (".l", stream); break;
1297 case E_SFmode: fputs (".s", stream); break;
1298 case E_DFmode: fputs (".d", stream); break;
1299 default: gcc_unreachable ();
1302 break;
1304 case 'm':
1305 gcc_assert (MEM_P (x));
1306 x = XEXP (x, 0);
1307 /* Fall through. */
1308 case 'U':
1309 switch (GET_CODE (x))
1311 case REG:
1312 case SUBREG:
1313 sh_print_operand (stream, x, 0);
1314 fputs (", 0", stream);
1315 break;
1317 case PLUS:
1318 sh_print_operand (stream, XEXP (x, 0), 0);
1319 fputs (", ", stream);
1320 sh_print_operand (stream, XEXP (x, 1), 0);
1321 break;
1323 default:
1324 gcc_unreachable ();
1326 break;
1328 case 'V':
1330 int num = exact_log2 (INTVAL (x));
1331 gcc_assert (num >= 0);
1332 fprintf (stream, "#%d", num);
1334 break;
1336 case 'W':
1338 int num = exact_log2 (~INTVAL (x));
1339 gcc_assert (num >= 0);
1340 fprintf (stream, "#%d", num);
1342 break;
1344 case 'd':
1345 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1347 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1348 break;
1350 case 'N':
1351 if (x == CONST0_RTX (GET_MODE (x)))
1353 fprintf ((stream), "r63");
1354 break;
1356 goto default_output;
1357 case 'u':
1358 if (CONST_INT_P (x))
1360 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1361 break;
1363 /* Fall through. */
1365 default_output:
1366 default:
1367 regno = 0;
1368 mode = GET_MODE (x);
1370 switch (GET_CODE (x))
1372 case TRUNCATE:
1374 rtx inner = XEXP (x, 0);
1375 int offset = 0;
1376 machine_mode inner_mode;
1378 /* We might see SUBREGs with vector mode registers inside. */
1379 if (GET_CODE (inner) == SUBREG
1380 && (GET_MODE_SIZE (GET_MODE (inner))
1381 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1382 && subreg_lowpart_p (inner))
1383 inner = SUBREG_REG (inner);
1384 if (CONST_INT_P (inner))
1386 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1387 goto default_output;
1389 inner_mode = GET_MODE (inner);
1390 if (GET_CODE (inner) == SUBREG
1391 && (GET_MODE_SIZE (GET_MODE (inner))
1392 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1393 && REG_P (SUBREG_REG (inner)))
1395 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1396 GET_MODE (SUBREG_REG (inner)),
1397 SUBREG_BYTE (inner),
1398 GET_MODE (inner));
1399 inner = SUBREG_REG (inner);
1401 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1402 abort ();
1403 /* Floating point register pairs are always big endian;
1404 general purpose registers are 64 bit wide. */
1405 regno = REGNO (inner);
1406 regno = (hard_regno_nregs (regno, inner_mode)
1407 - hard_regno_nregs (regno, mode))
1408 + offset;
1409 x = inner;
1410 goto reg;
1412 case SIGN_EXTEND:
1413 x = XEXP (x, 0);
1414 goto reg;
1415 case SUBREG:
1416 gcc_assert (SUBREG_BYTE (x) == 0
1417 && REG_P (SUBREG_REG (x)));
1419 x = SUBREG_REG (x);
1420 /* Fall through. */
1422 reg:
1423 case REG:
1424 regno += REGNO (x);
1425 if (FP_REGISTER_P (regno)
1426 && mode == V16SFmode)
1427 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1428 else if (FP_REGISTER_P (REGNO (x))
1429 && mode == V4SFmode)
1430 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1431 else if (REG_P (x)
1432 && mode == V2SFmode)
1433 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1434 else if (FP_REGISTER_P (REGNO (x))
1435 && GET_MODE_SIZE (mode) > 4)
1436 fprintf ((stream), "d%s", reg_names[regno] + 1);
1437 else
1438 fputs (reg_names[regno], (stream));
1439 break;
1441 case MEM:
1442 output_address (GET_MODE (x), XEXP (x, 0));
1443 break;
1445 default:
1446 fputc ('#', stream);
1447 output_addr_const (stream, x);
1448 break;
1450 break;
1454 static bool
1455 sh_print_operand_punct_valid_p (unsigned char code)
1457 return (code == '.' || code == '#' || code == '@' || code == ','
1458 || code == '$' || code == '\'' || code == '>');
1461 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1462 static bool
1463 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1465 if (GET_CODE (x) == UNSPEC)
1467 switch (XINT (x, 1))
1469 case UNSPEC_PIC:
1470 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1471 output_addr_const (file, XVECEXP (x, 0, 0));
1472 break;
1473 case UNSPEC_GOT:
1474 output_addr_const (file, XVECEXP (x, 0, 0));
1475 fputs ("@GOT", file);
1476 break;
1477 case UNSPEC_GOTOFF:
1478 output_addr_const (file, XVECEXP (x, 0, 0));
1479 fputs ("@GOTOFF", file);
1480 break;
1481 case UNSPEC_PLT:
1482 output_addr_const (file, XVECEXP (x, 0, 0));
1483 fputs ("@PLT", file);
1484 break;
1485 case UNSPEC_GOTPLT:
1486 output_addr_const (file, XVECEXP (x, 0, 0));
1487 fputs ("@GOTPLT", file);
1488 break;
1489 case UNSPEC_PCREL:
1490 output_addr_const (file, XVECEXP (x, 0, 0));
1491 fputs ("@PCREL", file);
1492 break;
1493 case UNSPEC_DTPOFF:
1494 output_addr_const (file, XVECEXP (x, 0, 0));
1495 fputs ("@DTPOFF", file);
1496 break;
1497 case UNSPEC_GOTTPOFF:
1498 output_addr_const (file, XVECEXP (x, 0, 0));
1499 fputs ("@GOTTPOFF", file);
1500 break;
1501 case UNSPEC_TPOFF:
1502 output_addr_const (file, XVECEXP (x, 0, 0));
1503 fputs ("@TPOFF", file);
1504 break;
1505 case UNSPEC_CALLER:
1507 char name[32];
1508 /* LPCS stands for Label for PIC Call Site. */
1509 targetm.asm_out.generate_internal_label (name, "LPCS",
1510 INTVAL (XVECEXP (x, 0, 0)));
1511 assemble_name (file, name);
1513 break;
1514 case UNSPEC_SYMOFF:
1515 output_addr_const (file, XVECEXP (x, 0, 0));
1516 fputc ('-', file);
1517 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1519 fputc ('(', file);
1520 output_addr_const (file, XVECEXP (x, 0, 1));
1521 fputc (')', file);
1523 else
1524 output_addr_const (file, XVECEXP (x, 0, 1));
1525 break;
1526 case UNSPEC_PCREL_SYMOFF:
1527 output_addr_const (file, XVECEXP (x, 0, 0));
1528 fputs ("-(", file);
1529 output_addr_const (file, XVECEXP (x, 0, 1));
1530 fputs ("-.)", file);
1531 break;
1532 case UNSPEC_GOTFUNCDESC:
1533 output_addr_const (file, XVECEXP (x, 0, 0));
1534 fputs ("@GOTFUNCDESC", file);
1535 break;
1536 case UNSPEC_GOTOFFFUNCDESC:
1537 output_addr_const (file, XVECEXP (x, 0, 0));
1538 fputs ("@GOTOFFFUNCDESC", file);
1539 break;
1540 default:
1541 return false;
1543 return true;
1545 else
1546 return false;
1549 /* Encode symbol attributes of a SYMBOL_REF into its
1550 SYMBOL_REF_FLAGS. */
1551 static void
1552 sh_encode_section_info (tree decl, rtx rtl, int first)
1554 default_encode_section_info (decl, rtl, first);
1556 if (TREE_CODE (decl) == FUNCTION_DECL
1557 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1558 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1561 /* Prepare operands for a move define_expand; specifically, one of the
1562 operands must be in a register. */
1563 void
1564 prepare_move_operands (rtx operands[], machine_mode mode)
1566 if ((mode == SImode || mode == DImode)
1567 && flag_pic
1568 && ! ((mode == Pmode || mode == ptr_mode)
1569 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1571 rtx temp;
1572 if (SYMBOLIC_CONST_P (operands[1]))
1574 if (MEM_P (operands[0]))
1575 operands[1] = force_reg (Pmode, operands[1]);
1576 else
1578 temp = (!can_create_pseudo_p ()
1579 ? operands[0]
1580 : gen_reg_rtx (Pmode));
1581 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1584 else if (GET_CODE (operands[1]) == CONST
1585 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1586 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1588 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1589 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1590 mode, temp);
1591 operands[1] = expand_binop (mode, add_optab, temp,
1592 XEXP (XEXP (operands[1], 0), 1),
1593 (!can_create_pseudo_p ()
1594 ? temp
1595 : gen_reg_rtx (Pmode)),
1596 0, OPTAB_LIB_WIDEN);
1600 if (! reload_in_progress && ! reload_completed)
1602 /* Copy the source to a register if both operands aren't registers. */
1603 if (! register_operand (operands[0], mode)
1604 && ! register_operand (operands[1], mode))
1605 operands[1] = copy_to_mode_reg (mode, operands[1]);
1607 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1609 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1610 except that we can't use that function because it is static. */
1611 rtx new_rtx = change_address (operands[0], mode, 0);
1612 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1613 operands[0] = new_rtx;
1616 /* This case can happen while generating code to move the result
1617 of a library call to the target. Reject `st r0,@(rX,rY)' because
1618 reload will fail to find a spill register for rX, since r0 is already
1619 being used for the source. */
1620 else if (refers_to_regno_p (R0_REG, operands[1])
1621 && MEM_P (operands[0])
1622 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1623 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1624 operands[1] = copy_to_mode_reg (mode, operands[1]);
1626 /* When the displacement addressing is used, RA will assign r0 to
1627 the pseudo register operand for the QI/HImode load/store.
1628 This tends to make a long live range for R0 and might cause
1629 anomalous register spills in some case with LRA. See PR
1630 target/55212.
1631 We split possible load/store to two move insns via r0 so as to
1632 shorten R0 live range. It will make some codes worse but will
1633 win on average for LRA.
1634 Also when base+index addressing is used and the index term is
1635 a subreg, LRA assumes that more hard registers can be available
1636 in some situation. It isn't the case for SH in the problematic
1637 case. We can pre-allocate R0 for that index term to avoid
1638 the issue. See PR target/66591. */
1639 else if (sh_lra_p ()
1640 && ! TARGET_SH2A
1641 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1642 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1644 bool load_p = REG_P (operands[0]);
1645 rtx reg = operands[load_p ? 0 : 1];
1646 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1648 if ((mode == QImode || mode == HImode)
1649 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1650 && GET_CODE (adr) == PLUS
1651 && REG_P (XEXP (adr, 0))
1652 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1653 && CONST_INT_P (XEXP (adr, 1))
1654 && INTVAL (XEXP (adr, 1)) != 0
1655 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1657 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1658 emit_move_insn (r0_rtx, operands[1]);
1659 operands[1] = r0_rtx;
1661 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1662 && GET_CODE (adr) == PLUS
1663 && REG_P (XEXP (adr, 0))
1664 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1665 && SUBREG_P (XEXP (adr, 1))
1666 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1668 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1669 emit_move_insn (r0_rtx, XEXP (adr, 1));
1670 XEXP (adr, 1) = r0_rtx;
1675 if (mode == Pmode || mode == ptr_mode)
1677 rtx op0 = operands[0];
1678 rtx op1 = operands[1];
1679 rtx opc;
1680 if (GET_CODE (op1) == CONST
1681 && GET_CODE (XEXP (op1, 0)) == PLUS
1682 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1683 != TLS_MODEL_NONE))
1685 opc = XEXP (XEXP (op1, 0), 1);
1686 op1 = XEXP (XEXP (op1, 0), 0);
1688 else
1689 opc = NULL_RTX;
1691 enum tls_model tls_kind;
1693 if (! reload_in_progress && ! reload_completed
1694 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1696 rtx tga_op1, tga_ret, tmp, tmp2;
1698 if (! flag_pic
1699 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1700 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1701 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1703 static int got_labelno;
1704 /* Don't schedule insns for getting GOT address when
1705 the first scheduling is enabled, to avoid spill
1706 failures for R0. */
1707 if (flag_schedule_insns)
1708 emit_insn (gen_blockage ());
1709 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1710 emit_use (gen_rtx_REG (SImode, PIC_REG));
1711 if (flag_schedule_insns)
1712 emit_insn (gen_blockage ());
1715 switch (tls_kind)
1717 case TLS_MODEL_GLOBAL_DYNAMIC:
1718 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1719 if (TARGET_FDPIC)
1720 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1721 sh_get_fdpic_reg_initial_val ());
1722 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1723 tmp = gen_reg_rtx (Pmode);
1724 emit_move_insn (tmp, tga_ret);
1725 op1 = tmp;
1726 break;
1728 case TLS_MODEL_LOCAL_DYNAMIC:
1729 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1730 if (TARGET_FDPIC)
1731 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1732 sh_get_fdpic_reg_initial_val ());
1733 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1735 tmp = gen_reg_rtx (Pmode);
1736 emit_move_insn (tmp, tga_ret);
1738 if (register_operand (op0, Pmode))
1739 tmp2 = op0;
1740 else
1741 tmp2 = gen_reg_rtx (Pmode);
1743 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1744 op1 = tmp2;
1745 break;
1747 case TLS_MODEL_INITIAL_EXEC:
1748 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1749 tmp = gen_sym2GOTTPOFF (op1);
1750 if (TARGET_FDPIC)
1751 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1752 sh_get_fdpic_reg_initial_val ());
1753 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1754 op1 = tga_op1;
1755 break;
1757 case TLS_MODEL_LOCAL_EXEC:
1758 tmp2 = gen_reg_rtx (Pmode);
1759 emit_insn (gen_store_gbr (tmp2));
1760 tmp = gen_reg_rtx (Pmode);
1761 emit_insn (gen_symTPOFF2reg (tmp, op1));
1763 if (register_operand (op0, Pmode))
1764 op1 = op0;
1765 else
1766 op1 = gen_reg_rtx (Pmode);
1768 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1769 break;
1771 default:
1772 gcc_unreachable ();
1774 if (opc)
1775 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1776 operands[1] = op1;
1780 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
1782 rtx base, offset;
1783 split_const (operands[1], &base, &offset);
1785 if (GET_CODE (base) == SYMBOL_REF
1786 && !offset_within_block_p (base, INTVAL (offset)))
1788 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
1789 emit_move_insn (tmp, base);
1790 if (!arith_operand (offset, mode))
1791 offset = force_reg (mode, offset);
1792 emit_insn (gen_add3_insn (operands[0], tmp, offset));
1797 /* Implement the canonicalize_comparison target hook for the combine
1798 pass. For the target hook this function is invoked via
1799 sh_canonicalize_comparison. This function is also re-used to
1800 canonicalize comparisons in cbranch pattern expanders. */
1801 static void
1802 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1803 machine_mode mode,
1804 bool op0_preserve_value)
1806 /* When invoked from within the combine pass the mode is not specified,
1807 so try to get it from one of the operands. */
1808 if (mode == VOIDmode)
1809 mode = GET_MODE (op0);
1810 if (mode == VOIDmode)
1811 mode = GET_MODE (op1);
1813 // We need to have a mode to do something useful here.
1814 if (mode == VOIDmode)
1815 return;
1817 // Currently, we don't deal with floats here.
1818 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1819 return;
1821 // Make sure that the constant operand is the second operand.
1822 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1824 if (op0_preserve_value)
1825 return;
1827 std::swap (op0, op1);
1828 cmp = swap_condition (cmp);
1831 if (CONST_INT_P (op1))
1833 /* Try to adjust the constant operand in such a way that available
1834 comparison insns can be utilized better and the constant can be
1835 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1836 constant pool. */
1837 const HOST_WIDE_INT val = INTVAL (op1);
1839 /* x > -1 --> x >= 0
1840 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1841 x <= -1 --> x < 0
1842 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1843 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1845 cmp = cmp == GT ? GE : LT;
1846 op1 = gen_int_mode (val + 1, mode);
1849 /* x >= 1 --> x > 0
1850 x >= 0x80 --> x > 0x7F
1851 x < 1 --> x <= 0
1852 x < 0x80 --> x <= 0x7F */
1853 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1855 cmp = cmp == GE ? GT : LE;
1856 op1 = gen_int_mode (val - 1, mode);
1859 /* unsigned x >= 1 --> x != 0
1860 unsigned x < 1 --> x == 0 */
1861 else if (val == 1 && (cmp == GEU || cmp == LTU))
1863 cmp = cmp == GEU ? NE : EQ;
1864 op1 = CONST0_RTX (mode);
1867 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1868 unsigned x < 0x80 --> unsigned x < 0x7F */
1869 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1871 cmp = cmp == GEU ? GTU : LEU;
1872 op1 = gen_int_mode (val - 1, mode);
1875 /* unsigned x > 0 --> x != 0
1876 unsigned x <= 0 --> x == 0 */
1877 else if (val == 0 && (cmp == GTU || cmp == LEU))
1878 cmp = cmp == GTU ? NE : EQ;
1880 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1881 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1882 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1883 && val == 0x7FFFFFFF)
1885 cmp = cmp == GTU ? LT : GE;
1886 op1 = const0_rtx;
1889 /* unsigned x >= 0x80000000 --> signed x < 0
1890 unsigned x < 0x80000000 --> signed x >= 0 */
1891 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1892 && (unsigned HOST_WIDE_INT)val
1893 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1895 cmp = cmp == GEU ? LT : GE;
1896 op1 = const0_rtx;
1901 /* This function implements the canonicalize_comparison target hook.
1902 This wrapper around the internally used sh_canonicalize_comparison
1903 function is needed to do the enum rtx_code <-> int conversion.
1904 Target hooks cannot use enum rtx_code in its definition. */
1905 static void
1906 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1907 bool op0_preserve_value)
1909 enum rtx_code tmp_code = (enum rtx_code)*code;
1910 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1911 VOIDmode, op0_preserve_value);
1912 *code = (int)tmp_code;
1915 /* This function implements the legitimate_combined_insn target hook,
1916 which the combine pass uses to early reject combined insns, before
1917 it tries to recog the insn and determine its cost. */
1918 static bool
1919 sh_legitimate_combined_insn (rtx_insn* insn)
1921 /* Reject combinations of memory loads and zero extensions, as these
1922 interfere with other combine patterns such as zero extracts and bit
1923 tests. The SH2A movu.{b|w} insns are formed later in the
1924 'sh_optimize_extu_exts' pass after combine/split1. */
1925 rtx p = PATTERN (insn);
1926 if (GET_CODE (p) == SET
1927 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
1928 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
1929 && MEM_P (XEXP (XEXP (p, 1), 0)))
1930 return false;
1932 return true;
1935 bool
1936 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1938 *p1 = T_REG;
1939 *p2 = INVALID_REGNUM;
1940 return true;
1943 /* Try to calculate the branch distance of a conditional branch in bytes.
1945 FIXME: Because of PR 59189 we can't use the CFG here. Instead just
1946 walk from this insn into the next (fall-through) basic block and see if
1947 we hit the label. */
1948 unsigned int
1949 sh_cbranch_distance (rtx_insn* _cbranch_insn, unsigned int max_dist)
1951 rtx_jump_insn* cbranch_insn = safe_as_a<rtx_jump_insn*> (_cbranch_insn);
1953 if (dump_file)
1955 fprintf (dump_file, "sh_cbranch_distance insn = \n");
1956 print_rtl_single (dump_file, cbranch_insn);
1959 unsigned int dist = 0;
1961 for (rtx_insn* i = next_nonnote_insn (cbranch_insn);
1962 i != NULL && dist < max_dist; i = next_nonnote_insn (i))
1964 const unsigned int i_len = get_attr_length (i);
1965 dist += i_len;
1967 if (dump_file)
1968 fprintf (dump_file, " insn %d length = %u dist = %u\n",
1969 INSN_UID (i), i_len, dist);
1971 if (rtx_code_label* l = dyn_cast<rtx_code_label*> (i))
1973 if (l == cbranch_insn->jump_target ())
1975 if (dump_file)
1976 fprintf (dump_file, " cbranch dist = %u\n", dist);
1977 return dist;
1979 break;
1983 if (dump_file)
1984 fprintf (dump_file, " cbranch dist = unknown\n");
1986 return unknown_cbranch_distance;
1989 enum rtx_code
1990 prepare_cbranch_operands (rtx *operands, machine_mode mode,
1991 enum rtx_code comparison)
1993 gcc_assert (can_create_pseudo_p ());
1995 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1996 comparison = GET_CODE (operands[0]);
1998 sh_canonicalize_comparison (comparison, operands[1], operands[2],
1999 mode, false);
2001 rtx op1 = operands[1];
2002 operands[1] = force_reg (mode, op1);
2004 /* When we are handling DImode comparisons, we want to keep constants so
2005 that we can optimize the component comparisons; however, memory loads
2006 are better issued as a whole so that they can be scheduled well.
2007 SImode equality comparisons allow I08 constants, but only when they
2008 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2009 into a register, that register might as well be r0, and we allow the
2010 constant. If it is already in a register, this is likely to be
2011 allocated to a different hard register, thus we load the constant into
2012 a register unless it is zero. */
2013 if (!REG_P (operands[2])
2014 && (!CONST_INT_P (operands[2])
2015 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2016 && ((comparison != EQ && comparison != NE)
2017 || (REG_P (op1) && REGNO (op1) != R0_REG)
2018 || !satisfies_constraint_I08 (operands[2])))))
2019 operands[2] = force_reg (mode, operands[2]);
2021 return comparison;
2024 static void
2025 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison,
2026 profile_probability probability)
2028 rtx (*branch_expander) (rtx) = gen_branch_true;
2029 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2030 switch (comparison)
2032 case NE: case LT: case LE: case LTU: case LEU:
2033 comparison = reverse_condition (comparison);
2034 branch_expander = gen_branch_false;
2035 default: ;
2037 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2038 gen_rtx_fmt_ee (comparison, SImode,
2039 operands[1], operands[2])));
2040 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2041 if (probability.initialized_p ())
2042 add_reg_br_prob_note (jump, probability);
2045 void
2046 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison)
2048 expand_cbranchsi4 (operands, comparison,
2049 profile_probability::uninitialized ());
2052 /* ??? How should we distribute probabilities when more than one branch
2053 is generated. So far we only have some ad-hoc observations:
2054 - If the operands are random, they are likely to differ in both parts.
2055 - If comparing items in a hash chain, the operands are random or equal;
2056 operation should be EQ or NE.
2057 - If items are searched in an ordered tree from the root, we can expect
2058 the highpart to be unequal about half of the time; operation should be
2059 an inequality comparison, operands non-constant, and overall probability
2060 about 50%. Likewise for quicksort.
2061 - Range checks will be often made against constants. Even if we assume for
2062 simplicity an even distribution of the non-constant operand over a
2063 sub-range here, the same probability could be generated with differently
2064 wide sub-ranges - as long as the ratio of the part of the subrange that
2065 is before the threshold to the part that comes after the threshold stays
2066 the same. Thus, we can't really tell anything here;
2067 assuming random distribution is at least simple.
2069 bool
2070 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2072 enum rtx_code msw_taken, msw_skip, lsw_taken;
2073 rtx_code_label *skip_label = NULL;
2074 rtx op1h, op1l, op2h, op2l;
2075 int num_branches;
2076 profile_probability prob, rev_prob;
2077 profile_probability msw_taken_prob = profile_probability::uninitialized (),
2078 msw_skip_prob = profile_probability::uninitialized (),
2079 lsw_taken_prob = profile_probability::uninitialized ();
2081 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2082 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2083 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2084 op1l = gen_lowpart (SImode, operands[1]);
2085 op2l = gen_lowpart (SImode, operands[2]);
2086 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2087 prob = split_branch_probability;
2088 rev_prob = prob.invert ();
2089 switch (comparison)
2091 case EQ:
2092 msw_skip = NE;
2093 lsw_taken = EQ;
2094 if (prob.initialized_p ())
2096 /* FIXME: This is not optimal. We do not really know the probablity
2097 that values differ by MCW only, but we should probably distribute
2098 probabilities more evenly. */
2099 msw_skip_prob = rev_prob;
2100 lsw_taken_prob = prob > profile_probability::never ()
2101 ? profile_probability::guessed_always ()
2102 : profile_probability::guessed_never ();
2104 break;
2105 case NE:
2106 msw_taken = NE;
2107 msw_taken_prob = prob;
2108 lsw_taken = NE;
2109 lsw_taken_prob = profile_probability::guessed_never ();
2110 break;
2111 case GTU: case GT:
2112 msw_taken = comparison;
2113 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2114 break;
2115 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2116 msw_skip = swap_condition (msw_taken);
2117 lsw_taken = GTU;
2118 break;
2119 case GEU: case GE:
2120 if (op2l == CONST0_RTX (SImode))
2121 msw_taken = comparison;
2122 else
2124 msw_taken = comparison == GE ? GT : GTU;
2125 msw_skip = swap_condition (msw_taken);
2126 lsw_taken = GEU;
2128 break;
2129 case LTU: case LT:
2130 msw_taken = comparison;
2131 if (op2l == CONST0_RTX (SImode))
2132 break;
2133 msw_skip = swap_condition (msw_taken);
2134 lsw_taken = LTU;
2135 break;
2136 case LEU: case LE:
2137 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2138 msw_taken = comparison;
2139 else
2141 lsw_taken = LEU;
2142 if (comparison == LE)
2143 msw_taken = LT;
2144 else if (op2h != CONST0_RTX (SImode))
2145 msw_taken = LTU;
2146 else
2148 msw_skip = swap_condition (LTU);
2149 break;
2151 msw_skip = swap_condition (msw_taken);
2153 break;
2154 default: return false;
2156 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2157 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2158 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2159 if (comparison != EQ && comparison != NE && num_branches > 1)
2161 if (!CONSTANT_P (operands[2])
2162 && prob.initialized_p ()
2163 && prob.to_reg_br_prob_base () >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2164 && prob.to_reg_br_prob_base () <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2166 msw_taken_prob = prob.apply_scale (1, 2);
2167 msw_skip_prob = rev_prob.apply_scale (REG_BR_PROB_BASE,
2168 rev_prob.to_reg_br_prob_base ()
2169 + REG_BR_PROB_BASE);
2170 lsw_taken_prob = prob;
2172 else
2174 msw_taken_prob = prob;
2175 msw_skip_prob = profile_probability::guessed_always ();
2176 /* ??? If we have a constant op2h, should we use that when
2177 calculating lsw_taken_prob? */
2178 lsw_taken_prob = prob;
2181 operands[1] = op1h;
2182 operands[2] = op2h;
2184 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2185 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2186 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2188 rtx taken_label = operands[3];
2190 /* Operands were possibly modified, but msw_skip doesn't expect this.
2191 Always use the original ones. */
2192 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2194 operands[1] = op1h;
2195 operands[2] = op2h;
2198 operands[3] = skip_label = gen_label_rtx ();
2199 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2200 operands[3] = taken_label;
2202 operands[1] = op1l;
2203 operands[2] = op2l;
2204 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2205 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2206 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2207 emit_label (skip_label);
2208 return true;
2211 /* Given an operand, return 1 if the evaluated operand plugged into an
2212 if_then_else will result in a branch_true, 0 if branch_false, or
2213 -1 if neither nor applies. The truth table goes like this:
2215 op | cmpval | code | result
2216 ---------+--------+---------+--------------------
2217 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2218 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2219 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2220 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2221 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2222 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2223 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2224 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2226 sh_eval_treg_value (rtx op)
2228 if (t_reg_operand (op, GET_MODE (op)))
2229 return 1;
2230 if (negt_reg_operand (op, GET_MODE (op)))
2231 return 0;
2233 rtx_code code = GET_CODE (op);
2234 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2235 return -1;
2237 int cmpop = code == EQ ? 1 : 0;
2238 int cmpval = INTVAL (XEXP (op, 1));
2239 if (cmpval != 0 && cmpval != 1)
2240 return -1;
2242 int t;
2243 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2244 t = 0;
2245 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2246 t = 1;
2247 else
2248 return -1;
2250 return t ^ (cmpval == cmpop);
2253 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2254 of floating-point comparisons. */
2255 static void
2256 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2258 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2259 && GET_CODE (insn) != PARALLEL)
2261 insn = gen_rtx_PARALLEL (VOIDmode,
2262 gen_rtvec (3, insn,
2263 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2264 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2266 emit_insn (insn);
2269 /* Prepare the operands for an scc instruction; make sure that the
2270 compare has been done and the result is in T_REG. */
2271 void
2272 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2274 rtx t_reg = get_t_reg_rtx ();
2275 enum rtx_code oldcode = code;
2277 /* First need a compare insn. */
2278 switch (code)
2280 case NE:
2281 /* It isn't possible to handle this case. */
2282 gcc_unreachable ();
2283 case LT:
2284 code = GT;
2285 break;
2286 case LE:
2287 code = GE;
2288 break;
2289 case LTU:
2290 code = GTU;
2291 break;
2292 case LEU:
2293 code = GEU;
2294 break;
2295 default:
2296 break;
2298 if (code != oldcode)
2299 std::swap (op0, op1);
2301 machine_mode mode = GET_MODE (op0);
2302 if (mode == VOIDmode)
2303 mode = GET_MODE (op1);
2305 op0 = force_reg (mode, op0);
2306 if ((code != EQ && code != NE
2307 && (op1 != const0_rtx
2308 || code == GTU || code == GEU || code == LTU || code == LEU))
2309 || (mode == DImode && op1 != const0_rtx)
2310 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2311 op1 = force_reg (mode, op1);
2313 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2314 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2315 mode);
2318 /* Called from the md file, set up the operands of a compare instruction. */
2319 void
2320 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2322 enum rtx_code code = GET_CODE (operands[0]);
2323 enum rtx_code branch_code;
2324 rtx op0 = operands[1];
2325 rtx op1 = operands[2];
2326 rtx insn;
2327 bool need_ccmpeq = false;
2329 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2331 op0 = force_reg (mode, op0);
2332 op1 = force_reg (mode, op1);
2334 else
2336 if (code != EQ || mode == DImode)
2338 /* Force args into regs, since we can't use constants here. */
2339 op0 = force_reg (mode, op0);
2340 if (op1 != const0_rtx || code == GTU || code == GEU)
2341 op1 = force_reg (mode, op1);
2345 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2347 if (code == LT
2348 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2349 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2351 std::swap (op0, op1);
2352 code = swap_condition (code);
2355 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2356 if (code == GE)
2358 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2359 need_ccmpeq = true;
2360 code = GT;
2363 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2364 to EQ/GT respectively. */
2365 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2368 switch (code)
2370 case EQ:
2371 case GT:
2372 case GE:
2373 case GTU:
2374 case GEU:
2375 branch_code = code;
2376 break;
2377 case NE:
2378 case LT:
2379 case LE:
2380 case LTU:
2381 case LEU:
2382 branch_code = reverse_condition (code);
2383 break;
2384 default:
2385 gcc_unreachable ();
2388 insn = gen_rtx_SET (get_t_reg_rtx (),
2389 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2391 sh_emit_set_t_insn (insn, mode);
2392 if (need_ccmpeq)
2393 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2395 if (branch_code == code)
2396 emit_jump_insn (gen_branch_true (operands[3]));
2397 else
2398 emit_jump_insn (gen_branch_false (operands[3]));
2401 void
2402 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2404 enum rtx_code code = GET_CODE (operands[1]);
2405 rtx op0 = operands[2];
2406 rtx op1 = operands[3];
2407 rtx_code_label *lab = NULL;
2408 bool invert = false;
2410 op0 = force_reg (mode, op0);
2411 if ((code != EQ && code != NE
2412 && (op1 != const0_rtx
2413 || code == GTU || code == GEU || code == LTU || code == LEU))
2414 || (mode == DImode && op1 != const0_rtx)
2415 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2416 op1 = force_reg (mode, op1);
2418 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2420 if (code == LT || code == LE)
2422 std::swap (op0, op1);
2423 code = swap_condition (code);
2425 if (code == GE)
2427 if (TARGET_IEEE)
2429 lab = gen_label_rtx ();
2430 sh_emit_scc_to_t (EQ, op0, op1);
2431 emit_jump_insn (gen_branch_true (lab));
2432 code = GT;
2434 else
2436 code = LT;
2437 invert = true;
2442 if (code == NE)
2444 code = EQ;
2445 invert = true;
2448 sh_emit_scc_to_t (code, op0, op1);
2449 if (lab)
2450 emit_label (lab);
2451 if (invert)
2452 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2453 else
2454 emit_move_insn (operands[0], get_t_reg_rtx ());
2457 /* Functions to output assembly code. */
2459 /* Return a sequence of instructions to perform DI or DF move.
2461 Since the SH cannot move a DI or DF in one instruction, we have
2462 to take care when we see overlapping source and dest registers. */
2463 const char *
2464 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2465 machine_mode mode)
2467 rtx dst = operands[0];
2468 rtx src = operands[1];
2470 if (MEM_P (dst)
2471 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2472 return "mov.l %T1,%0" "\n"
2473 " mov.l %1,%0";
2475 if (register_operand (dst, mode)
2476 && register_operand (src, mode))
2478 if (REGNO (src) == MACH_REG)
2479 return "sts mach,%S0" "\n"
2480 " sts macl,%R0";
2482 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2483 when mov.d r1,r0 do r1->r0 then r2->r1. */
2484 if (REGNO (src) + 1 == REGNO (dst))
2485 return "mov %T1,%T0" "\n"
2486 " mov %1,%0";
2487 else
2488 return "mov %1,%0" "\n"
2489 " mov %T1,%T0";
2491 else if (CONST_INT_P (src))
2493 if (INTVAL (src) < 0)
2494 output_asm_insn ("mov #-1,%S0", operands);
2495 else
2496 output_asm_insn ("mov #0,%S0", operands);
2498 return "mov %1,%R0";
2500 else if (MEM_P (src))
2502 int ptrreg = -1;
2503 int dreg = REGNO (dst);
2504 rtx inside = XEXP (src, 0);
2506 switch (GET_CODE (inside))
2508 case REG:
2509 ptrreg = REGNO (inside);
2510 break;
2512 case SUBREG:
2513 ptrreg = subreg_regno (inside);
2514 break;
2516 case PLUS:
2517 ptrreg = REGNO (XEXP (inside, 0));
2518 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2519 an offsettable address. Unfortunately, offsettable addresses use
2520 QImode to check the offset, and a QImode offsettable address
2521 requires r0 for the other operand, which is not currently
2522 supported, so we can't use the 'o' constraint.
2523 Thus we must check for and handle r0+REG addresses here.
2524 We punt for now, since this is likely very rare. */
2525 gcc_assert (!REG_P (XEXP (inside, 1)));
2526 break;
2528 case LABEL_REF:
2529 return "mov.l %1,%0" "\n"
2530 " mov.l %1+4,%T0";
2531 case POST_INC:
2532 return "mov.l %1,%0" "\n"
2533 " mov.l %1,%T0";
2534 default:
2535 gcc_unreachable ();
2538 /* Work out the safe way to copy. Copy into the second half first. */
2539 if (dreg == ptrreg)
2540 return "mov.l %T1,%T0" "\n"
2541 " mov.l %1,%0";
2544 return "mov.l %1,%0" "\n"
2545 " mov.l %T1,%T0";
2548 /* Print an instruction which would have gone into a delay slot after
2549 another instruction, but couldn't because the other instruction expanded
2550 into a sequence where putting the slot insn at the end wouldn't work. */
2551 static void
2552 print_slot (rtx_sequence *seq)
2554 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2556 seq->insn (1)->set_deleted ();
2559 const char *
2560 output_far_jump (rtx_insn *insn, rtx op)
2562 struct { rtx lab, reg, op; } this_jmp;
2563 rtx_code_label *braf_base_lab = NULL;
2564 const char *jump;
2565 int far;
2566 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2567 rtx_insn *prev;
2569 this_jmp.lab = gen_label_rtx ();
2571 if (TARGET_SH2
2572 && offset >= -32764
2573 && offset - get_attr_length (insn) <= 32766
2574 && ! CROSSING_JUMP_P (insn))
2576 far = 0;
2577 jump = "mov.w %O0,%1" "\n"
2578 " braf %1";
2580 else
2582 far = 1;
2583 if (flag_pic)
2585 if (TARGET_SH2)
2586 jump = "mov.l %O0,%1" "\n"
2587 " braf %1";
2588 else
2589 jump = "mov.l r0,@-r15" "\n"
2590 " mova %O0,r0" "\n"
2591 " mov.l @r0,%1" "\n"
2592 " add r0,%1" "\n"
2593 " mov.l @r15+,r0" "\n"
2594 " jmp @%1";
2596 else
2597 jump = "mov.l %O0,%1" "\n"
2598 " jmp @%1";
2600 /* If we have a scratch register available, use it. */
2601 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2602 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2604 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2605 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2606 jump = "mov.l r1,@-r15" "\n"
2607 " mova %O0,r0" "\n"
2608 " mov.l @r0,r1" "\n"
2609 " add r1,r0" "\n"
2610 " mov.l @r15+,r1" "\n"
2611 " jmp @%1";
2612 output_asm_insn (jump, &this_jmp.lab);
2613 if (dbr_sequence_length ())
2614 print_slot (final_sequence);
2615 else
2616 output_asm_insn ("nop", 0);
2618 else
2620 /* Output the delay slot insn first if any. */
2621 if (dbr_sequence_length ())
2622 print_slot (final_sequence);
2624 this_jmp.reg = gen_rtx_REG (SImode, 13);
2625 output_asm_insn ("mov.l r13,@-r15", 0);
2626 output_asm_insn (jump, &this_jmp.lab);
2627 output_asm_insn ("mov.l @r15+,r13", 0);
2629 if (far && flag_pic && TARGET_SH2)
2631 braf_base_lab = gen_label_rtx ();
2632 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2633 CODE_LABEL_NUMBER (braf_base_lab));
2635 if (far)
2636 output_asm_insn (".align 2", 0);
2637 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2638 this_jmp.op = op;
2639 if (far && flag_pic)
2641 if (TARGET_SH2)
2642 this_jmp.lab = braf_base_lab;
2643 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2645 else
2646 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2647 return "";
2650 /* Local label counter, used for constants in the pool and inside
2651 pattern branches. */
2652 static int lf = 100;
2654 /* Output code for ordinary branches. */
2655 const char *
2656 output_branch (int logic, rtx_insn *insn, rtx *operands)
2658 switch (get_attr_length (insn))
2660 case 6:
2661 /* This can happen if filling the delay slot has caused a forward
2662 branch to exceed its range (we could reverse it, but only
2663 when we know we won't overextend other branches; this should
2664 best be handled by relaxation).
2665 It can also happen when other condbranches hoist delay slot insn
2666 from their destination, thus leading to code size increase.
2667 But the branch will still be in the range -4092..+4098 bytes. */
2668 if (! TARGET_RELAX)
2670 int label = lf++;
2671 /* The call to print_slot will clobber the operands. */
2672 rtx op0 = operands[0];
2674 /* If the instruction in the delay slot is annulled (true), then
2675 there is no delay slot where we can put it now. The only safe
2676 place for it is after the label. final will do that by default. */
2678 if (final_sequence
2679 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2680 && get_attr_length (final_sequence->insn (1)))
2682 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2683 ASSEMBLER_DIALECT ? "/" : ".", label);
2684 print_slot (final_sequence);
2686 else
2687 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2689 output_asm_insn ("bra\t%l0", &op0);
2690 fprintf (asm_out_file, "\tnop\n");
2691 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2693 return "";
2695 /* FALLTHRU */
2696 /* When relaxing, handle this like a short branch. The linker
2697 will fix it up if it still doesn't fit after relaxation. */
2698 case 2:
2699 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2701 /* These are for SH2e, in which we have to account for the
2702 extra nop because of the hardware bug in annulled branches. */
2703 case 8:
2704 if (! TARGET_RELAX)
2706 int label = lf++;
2708 gcc_assert (!final_sequence
2709 || !(INSN_ANNULLED_BRANCH_P
2710 (XVECEXP (final_sequence, 0, 0))));
2711 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2712 logic ? "f" : "t",
2713 ASSEMBLER_DIALECT ? "/" : ".", label);
2714 fprintf (asm_out_file, "\tnop\n");
2715 output_asm_insn ("bra\t%l0", operands);
2716 fprintf (asm_out_file, "\tnop\n");
2717 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2719 return "";
2721 /* FALLTHRU */
2722 case 4:
2724 char buffer[10];
2726 sprintf (buffer, "b%s%ss\t%%l0",
2727 logic ? "t" : "f",
2728 ASSEMBLER_DIALECT ? "/" : ".");
2729 output_asm_insn (buffer, &operands[0]);
2730 return "nop";
2733 default:
2734 /* There should be no longer branches now - that would
2735 indicate that something has destroyed the branches set
2736 up in machine_dependent_reorg. */
2737 gcc_unreachable ();
2741 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2742 fill in operands 9 as a label to the successor insn.
2743 We try to use jump threading where possible.
2744 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2745 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2746 follow jmp and bt, if the address is in range. */
2747 const char *
2748 output_branchy_insn (enum rtx_code code, const char *templ,
2749 rtx_insn *insn, rtx *operands)
2751 rtx_insn *next_insn = NEXT_INSN (insn);
2753 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2755 rtx src = SET_SRC (PATTERN (next_insn));
2756 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2758 /* Following branch not taken */
2759 rtx_code_label *lab = gen_label_rtx ();
2760 emit_label_after (lab, next_insn);
2761 INSN_ADDRESSES_NEW (lab,
2762 INSN_ADDRESSES (INSN_UID (next_insn))
2763 + get_attr_length (next_insn));
2764 operands[9] = lab;
2765 return templ;
2767 else
2769 int offset = (branch_dest (next_insn)
2770 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2771 if (offset >= -252 && offset <= 258)
2773 if (GET_CODE (src) == IF_THEN_ELSE)
2774 /* branch_true */
2775 src = XEXP (src, 1);
2776 operands[9] = src;
2777 return templ;
2781 rtx_code_label *lab = gen_label_rtx ();
2782 emit_label_after (lab, insn);
2783 INSN_ADDRESSES_NEW (lab,
2784 INSN_ADDRESSES (INSN_UID (insn))
2785 + get_attr_length (insn));
2786 operands[9] = lab;
2787 return templ;
2790 const char *
2791 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2793 return output_branchy_insn (NE, "bt %l9" "\n"
2794 " fcmp/eq %1,%0",
2795 insn, operands);
2798 /* Output the start of the assembler file. */
2799 static void
2800 sh_file_start (void)
2802 default_file_start ();
2804 if (TARGET_ELF)
2805 /* We need to show the text section with the proper
2806 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2807 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2808 will complain. We can teach GAS specifically about the
2809 default attributes for our choice of text section, but
2810 then we would have to change GAS again if/when we change
2811 the text section name. */
2812 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2813 else
2814 /* Switch to the data section so that the coffsem symbol
2815 isn't in the text section. */
2816 switch_to_section (data_section);
2818 if (TARGET_LITTLE_ENDIAN)
2819 fputs ("\t.little\n", asm_out_file);
2822 /* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions
2823 need to be output as pointers to function descriptors for
2824 FDPIC. */
2826 static bool
2827 sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
2829 if (TARGET_FDPIC && size == UNITS_PER_WORD
2830 && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value))
2832 fputs ("\t.long\t", asm_out_file);
2833 output_addr_const (asm_out_file, value);
2834 fputs ("@FUNCDESC\n", asm_out_file);
2835 return true;
2837 return default_assemble_integer (value, size, aligned_p);
2840 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2841 static bool
2842 unspec_caller_rtx_p (rtx pat)
2844 rtx base, offset;
2845 split_const (pat, &base, &offset);
2847 if (GET_CODE (base) == UNSPEC)
2849 if (XINT (base, 1) == UNSPEC_CALLER)
2850 return true;
2851 for (int i = 0; i < XVECLEN (base, 0); i++)
2852 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2853 return true;
2855 return false;
2858 /* Indicate that INSN cannot be duplicated. This is true for insn
2859 that generates a unique label. */
2860 static bool
2861 sh_cannot_copy_insn_p (rtx_insn *insn)
2863 if (!reload_completed || !flag_pic)
2864 return false;
2866 if (!NONJUMP_INSN_P (insn))
2867 return false;
2868 if (asm_noperands (insn) >= 0)
2869 return false;
2871 rtx pat = PATTERN (insn);
2873 if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
2874 return false;
2876 if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL)
2878 rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
2879 if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0)))
2880 return true;
2883 if (GET_CODE (pat) != SET)
2884 return false;
2885 pat = SET_SRC (pat);
2887 if (unspec_caller_rtx_p (pat))
2888 return true;
2890 return false;
2893 /* Number of instructions used to make an arithmetic right shift by N. */
2894 static const char ashiftrt_insns[] =
2895 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2897 /* Description of a logical left or right shift, when expanded to a sequence
2898 of 1/2/8/16 shifts.
2899 Notice that one bit right shifts clobber the T bit. One bit left shifts
2900 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2901 enum
2903 ASHL_CLOBBERS_T = 1 << 0,
2904 LSHR_CLOBBERS_T = 1 << 1
2907 struct ashl_lshr_sequence
2909 char insn_count;
2910 signed char amount[6];
2911 char clobbers_t;
2914 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2916 { 0, { 0 }, 0 }, // 0
2917 { 1, { 1 }, LSHR_CLOBBERS_T },
2918 { 1, { 2 }, 0 },
2919 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2920 { 2, { 2, 2 }, 0 }, // 4
2921 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2922 { 3, { 2, 2, 2 }, 0 },
2923 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2924 { 1, { 8 }, 0 }, // 8
2925 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2926 { 2, { 8, 2 }, 0 },
2927 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2928 { 3, { 8, 2, 2 }, 0 }, // 12
2929 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2930 { 3, { 8, -2, 8 }, 0 },
2931 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2932 { 1, { 16 }, 0 }, // 16
2933 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2934 { 2, { 16, 2 }, 0 },
2935 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2936 { 3, { 16, 2, 2 }, 0 }, // 20
2937 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2938 { 3, { 16, -2, 8 }, 0 },
2939 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2940 { 2, { 16, 8 }, 0 }, // 24
2941 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2942 { 3, { 16, 8, 2 }, 0 },
2943 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2944 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2945 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2946 { 3, { 16, -2, 16 }, 0 },
2948 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2949 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2950 However, the shift-and combiner code needs this entry here to be in
2951 terms of real shift insns. */
2952 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2955 /* Individual shift amounts for shift amounts < 16, up to three highmost
2956 bits might be clobbered. This is typically used when combined with some
2957 kind of sign or zero extension. */
2958 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
2960 { 0, { 0 }, 0 }, // 0
2961 { 1, { 1 }, LSHR_CLOBBERS_T },
2962 { 1, { 2 }, 0 },
2963 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2964 { 2, { 2, 2 }, 0 }, // 4
2965 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2966 { 2, { 8, -2 }, 0 },
2967 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
2968 { 1, { 8 }, 0 }, // 8
2969 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2970 { 2, { 8, 2 }, 0 },
2971 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2972 { 3, { 8, 2, 2 }, 0 }, // 12
2973 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
2974 { 2, { 16, -2 }, 0 },
2975 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
2976 { 1, { 16 }, 0 }, // 16
2977 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2978 { 2, { 16, 2 }, 0 },
2979 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2980 { 3, { 16, 2, 2 }, 0 }, // 20
2981 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2982 { 3, { 16, -2, 8 }, 0 },
2983 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2984 { 2, { 16, 8 }, 0 }, // 24
2985 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2986 { 3, { 16, 8, 2 }, 0 },
2987 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2988 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2989 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2990 { 3, { 16, -2, 16 }, 0 },
2991 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2994 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
2995 will clobber the T bit. */
2996 bool
2997 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
2999 gcc_assert (CONST_INT_P (shift_amount));
3001 const int shift_amount_i = INTVAL (shift_amount) & 31;
3003 /* Special case for shift count of 31: use and-rotl sequence. */
3004 if (shift_amount_i == 31)
3005 return true;
3007 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3008 & ASHL_CLOBBERS_T) != 0;
3011 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3012 instructions will clobber the T bit. */
3013 bool
3014 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3016 gcc_assert (CONST_INT_P (shift_amount));
3018 /* For right shifts the constant might be negative. */
3019 const int shift_amount_i = std::abs (INTVAL (shift_amount)) & 31;
3021 /* Special case for shift count of 31: use shll-movt sequence. */
3022 if (shift_amount_i == 31)
3023 return true;
3025 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3026 & LSHR_CLOBBERS_T) != 0;
3029 /* Return true if it is potentially beneficial to use a dynamic shift
3030 instruction (shad / shar) instead of a combination of 1/2/8/16
3031 shift instructions for the specified shift count.
3032 If dynamic shifts are not available, always return false. */
3033 bool
3034 sh_dynamicalize_shift_p (rtx count)
3036 gcc_assert (CONST_INT_P (count));
3038 /* For right shifts the constant might be negative. */
3039 const int shift_amount_i = std::abs (INTVAL (count)) & 31;
3040 int insn_count;
3042 /* For left and right shifts, there are shorter 2 insn sequences for
3043 shift amounts of 31. */
3044 if (shift_amount_i == 31)
3045 insn_count = 2;
3046 else
3047 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3049 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3052 /* Assuming we have a value that has been sign-extended by at least one bit,
3053 can we use the ext_shift_amounts with the last shift turned to an
3054 arithmetic shift to shift it by N without data loss, and quicker than by
3055 other means? */
3056 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3058 /* Return the cost of a shift. */
3059 static inline int
3060 shiftcosts (rtx x)
3062 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3064 if (GET_MODE (x) == DImode
3065 && CONST_INT_P (XEXP (x, 1))
3066 && INTVAL (XEXP (x, 1)) == 1)
3067 return 2;
3069 /* Everything else is invalid, because there is no pattern for it. */
3070 return -1;
3072 /* If shift by a non constant, then this will be expensive. */
3073 if (!CONST_INT_P (XEXP (x, 1)))
3074 return SH_DYNAMIC_SHIFT_COST;
3076 /* Otherwise, return the true cost in instructions. Cope with out of range
3077 shift counts more or less arbitrarily. */
3078 int value = INTVAL (XEXP (x, 1)) & 31;
3080 if (GET_CODE (x) == ASHIFTRT)
3082 int cost = ashiftrt_insns[value];
3083 /* If dynamic shifts are available and profitable in this case, then we
3084 put the constant in a reg and use shad. */
3085 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3086 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3087 return cost;
3089 else
3090 return ashl_lshr_seq[value].insn_count;
3093 /* Return the cost of an AND/XOR/IOR operation. */
3094 static inline int
3095 and_xor_ior_costs (rtx x, int code)
3097 /* On SH1-4 we have only max. SImode operations.
3098 Double the cost for modes > SImode. */
3099 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3101 /* A logical operation with two registers is a single cycle
3102 instruction. */
3103 if (!CONST_INT_P (XEXP (x, 1)))
3104 return 1 * cost_scale;
3106 int i = INTVAL (XEXP (x, 1));
3108 /* These constants are single cycle extu.[bw] instructions. */
3109 if ((i == 0xff || i == 0xffff) && code == AND)
3110 return 1 * cost_scale;
3111 /* Constants that can be used in an instruction as an immediate are
3112 a single cycle, but this requires r0, so make it a little more
3113 expensive. */
3114 if (CONST_OK_FOR_K08 (i))
3115 return 2 * cost_scale;
3116 /* Constants that can be loaded with a mov immediate need one more cycle.
3117 This case is probably unnecessary. */
3118 if (CONST_OK_FOR_I08 (i))
3119 return 2 * cost_scale;
3120 /* Any other constant requires an additional 2 cycle pc-relative load.
3121 This case is probably unnecessary. */
3122 return 3 * cost_scale;
3125 /* Return the cost of an addition or a subtraction. */
3126 static inline int
3127 addsubcosts (rtx x)
3129 if (GET_MODE (x) == SImode)
3131 /* The addc or subc patterns will eventually become one or two
3132 instructions. Below are some costs for some of the patterns
3133 which combine would reject because the costs of the individual
3134 insns in the patterns are lower.
3136 FIXME: It would be much easier if we had something like insn cost
3137 attributes and the cost calculation machinery used those attributes
3138 in the first place. This would eliminate redundant recog-like C
3139 code to calculate costs of complex patterns. */
3140 rtx op0 = XEXP (x, 0);
3141 rtx op1 = XEXP (x, 1);
3143 if (GET_CODE (x) == PLUS)
3145 if (GET_CODE (op0) == AND
3146 && XEXP (op0, 1) == const1_rtx
3147 && (GET_CODE (op1) == PLUS
3148 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3149 return 1;
3151 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3152 && GET_CODE (op1) == LSHIFTRT
3153 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3154 return 1;
3156 /* Let's assume that adding the result of an insns that stores into
3157 the T bit is cheap. */
3158 if (treg_set_expr (op1, SImode))
3159 return 1;
3160 if (treg_set_expr (op0, SImode))
3161 return 1;
3164 /* On SH1-4 we have only max. SImode operations.
3165 Double the cost for modes > SImode. */
3166 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3168 /* Adding a register is a single cycle insn. */
3169 if (REG_P (XEXP (x, 1))
3170 || GET_CODE (XEXP (x, 1)) == SUBREG)
3171 return 1 * cost_scale;
3173 /* Likewise for small constants. */
3174 if (CONST_INT_P (XEXP (x, 1))
3175 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3176 return 1 * cost_scale;
3178 /* Any other constant requires a 2 cycle pc-relative load plus an
3179 addition. */
3180 return 3 * cost_scale;
3183 /* Return the cost of a multiply. */
3184 static inline int
3185 multcosts (rtx x ATTRIBUTE_UNUSED)
3187 if (sh_multcost >= 0)
3188 return sh_multcost;
3190 if (TARGET_SH2)
3192 /* We have a mul insn, so we can never take more than the mul and the
3193 read of the mac reg, but count more because of the latency and extra
3194 reg usage. */
3195 if (optimize_size)
3196 return 2;
3197 return 3;
3200 /* If we're aiming at small code, then just count the number of
3201 insns in a multiply call sequence. */
3202 if (optimize_size)
3203 return 5;
3205 /* Otherwise count all the insns in the routine we'd be calling too. */
3206 return 20;
3209 /* Compute a (partial) cost for rtx X. Return true if the complete
3210 cost has been computed, and false if subexpressions should be
3211 scanned. In either case, *TOTAL contains the cost result. */
3212 static bool
3213 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
3214 int opno ATTRIBUTE_UNUSED,
3215 int *total, bool speed ATTRIBUTE_UNUSED)
3217 int code = GET_CODE (x);
3219 switch (code)
3221 /* The lower-subreg pass decides whether to split multi-word regs
3222 into individual regs by looking at the cost for a SET of certain
3223 modes with the following patterns:
3224 (set (reg) (reg))
3225 (set (reg) (const_int 0))
3226 On machines that support vector-move operations a multi-word move
3227 is the same cost as individual reg move. On SH there is no
3228 vector-move, so we have to provide the correct cost in the number
3229 of move insns to load/store the reg of the mode in question. */
3230 case SET:
3231 if (sh_movt_set_dest (x) != NULL || sh_movrt_set_dest (x) != NULL)
3233 *total = COSTS_N_INSNS (1);
3234 return true;
3237 if (register_operand (SET_DEST (x), VOIDmode)
3238 && (register_operand (SET_SRC (x), VOIDmode)
3239 || satisfies_constraint_Z (SET_SRC (x))))
3241 const machine_mode mode = GET_MODE (SET_DEST (x));
3242 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3243 / mov_insn_size (mode, TARGET_SH2A));
3244 return true;
3246 return false;
3248 /* The cost of a mem access is mainly the cost of the address mode. */
3249 case MEM:
3250 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3251 true);
3252 return true;
3254 case IF_THEN_ELSE:
3255 /* This case is required for the if_then_else negc pattern. */
3256 if (treg_set_expr (XEXP (x, 0), SImode))
3258 *total = COSTS_N_INSNS (1);
3259 return true;
3261 else
3262 return false;
3264 /* Zero extracts of single bits are usually combine patterns for the
3265 tst insns. */
3266 case ZERO_EXTRACT:
3267 if (GET_CODE (XEXP (x, 0)) == XOR
3268 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3269 && XEXP (x, 1) == const1_rtx
3270 && CONST_INT_P (XEXP (x, 2))
3271 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3272 /* Check that the xor constaint overlaps with the extracted bit. */
3273 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3275 *total = 1; //COSTS_N_INSNS (1);
3276 return true;
3279 /* div0s variant. */
3280 if (GET_CODE (XEXP (x, 0)) == XOR
3281 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3282 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3284 *total = 1;
3285 return true;
3287 return false;
3289 /* The cost of a sign or zero extend depends on whether the source is a
3290 reg or a mem. In case of a mem take the address into account. */
3291 case SIGN_EXTEND:
3292 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3294 *total = COSTS_N_INSNS (1);
3295 return true;
3297 if (MEM_P (XEXP (x, 0)))
3299 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3300 GET_MODE (XEXP (x, 0)),
3301 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3302 return true;
3304 return false;
3306 case ZERO_EXTEND:
3307 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3309 *total = COSTS_N_INSNS (1);
3310 return true;
3312 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3313 && (GET_MODE (XEXP (x, 0)) == QImode
3314 || GET_MODE (XEXP (x, 0)) == HImode))
3316 /* Handle SH2A's movu.b and movu.w insn. */
3317 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3318 GET_MODE (XEXP (x, 0)),
3319 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3320 return true;
3322 return false;
3324 /* mems for SFmode and DFmode can be inside a parallel due to
3325 the way the fpscr is handled. */
3326 case PARALLEL:
3327 for (int i = 0; i < XVECLEN (x, 0); i++)
3329 rtx xx = XVECEXP (x, 0, i);
3330 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3332 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3333 GET_MODE (XEXP (xx, 0)),
3334 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3335 return true;
3337 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3339 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3340 GET_MODE (XEXP (xx, 1)),
3341 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3342 return true;
3346 if (sh_1el_vec (x, VOIDmode))
3347 *total = outer_code != SET;
3348 else if (sh_rep_vec (x, VOIDmode))
3349 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3350 + (outer_code != SET));
3351 else
3352 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3353 return true;
3355 case CONST_INT:
3356 if (CONST_OK_FOR_I08 (INTVAL (x)))
3357 *total = 0;
3358 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3359 && CONST_OK_FOR_K08 (INTVAL (x)))
3360 *total = 1;
3361 /* prepare_cmp_insn will force costly constants int registers before
3362 the cbranch[sd]i4 patterns can see them, so preserve potentially
3363 interesting ones not covered by I08 above. */
3364 else if (outer_code == COMPARE
3365 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3366 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3367 || INTVAL (x) == 0x7fffffff
3368 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3369 *total = 1;
3370 else
3371 *total = 8;
3372 return true;
3374 case EQ:
3375 /* An and with a constant compared against zero is
3376 most likely going to be a TST #imm, R0 instruction. */
3377 if (XEXP (x, 1) == const0_rtx
3378 && ((GET_CODE (XEXP (x, 0)) == AND
3379 || (SUBREG_P (XEXP (x, 0))
3380 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND))
3381 || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT))
3383 *total = 1;
3384 return true;
3387 else if (XEXP (x, 1) == const0_rtx
3388 && GET_CODE (XEXP (x, 0)) == AND
3389 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3390 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3391 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3392 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3394 *total = 1;
3395 return true;
3397 else
3398 return false;
3400 case SMIN:
3401 case SMAX:
3402 /* This is most likely a clips.b or clips.w insn that is being made up
3403 by combine. */
3404 if (TARGET_SH2A
3405 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3406 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3407 && REG_P (XEXP (XEXP (x, 0), 0))
3408 && CONST_INT_P (XEXP (x, 1)))
3410 *total = COSTS_N_INSNS (1);
3411 return true;
3413 else
3414 return false;
3416 case CONST:
3417 case LABEL_REF:
3418 case SYMBOL_REF:
3419 *total = 5;
3420 return true;
3422 case CONST_DOUBLE:
3423 /* prepare_cmp_insn will force costly constants int registers before
3424 the cbranchdi4 pattern can see them, so preserve potentially
3425 interesting ones. */
3426 if (outer_code == COMPARE && GET_MODE (x) == DImode)
3427 *total = 1;
3428 else
3429 *total = 10;
3430 return true;
3432 case CONST_VECTOR:
3433 /* FIXME: This looks broken. Only the last statement has any effect.
3434 Probably this could be folded with the PARALLEL case? */
3435 if (x == CONST0_RTX (GET_MODE (x)))
3436 *total = 0;
3437 else if (sh_1el_vec (x, VOIDmode))
3438 *total = outer_code != SET;
3439 if (sh_rep_vec (x, VOIDmode))
3440 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3441 + (outer_code != SET));
3442 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3443 return true;
3445 case PLUS:
3446 case MINUS:
3447 *total = COSTS_N_INSNS (addsubcosts (x));
3448 return true;
3450 case AND:
3451 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3452 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3454 *total = COSTS_N_INSNS (1);
3455 return true;
3457 /* Fall through. */
3459 case XOR:
3460 case IOR:
3461 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3462 return true;
3464 case MULT:
3465 *total = COSTS_N_INSNS (multcosts (x));
3466 return true;
3468 case LT:
3469 case GE:
3470 /* div0s sign comparison. */
3471 if (GET_CODE (XEXP (x, 0)) == XOR
3472 && REG_P ((XEXP (XEXP (x, 0), 0)))
3473 && REG_P ((XEXP (XEXP (x, 0), 1)))
3474 && satisfies_constraint_Z (XEXP (x, 1)))
3476 *total = COSTS_N_INSNS (1);
3477 return true;
3479 else
3480 return false;
3482 case LSHIFTRT:
3483 /* div0s sign comparison. */
3484 if (GET_CODE (XEXP (x, 0)) == XOR
3485 && REG_P ((XEXP (XEXP (x, 0), 0)))
3486 && REG_P ((XEXP (XEXP (x, 0), 1)))
3487 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3489 *total = COSTS_N_INSNS (1);
3490 return true;
3492 /* FALLTHRU */
3493 case ASHIFT:
3494 case ASHIFTRT:
3496 int cost = shiftcosts (x);
3497 if (cost < 0)
3498 return false;
3499 *total = COSTS_N_INSNS (cost);
3500 return true;
3503 case DIV:
3504 case UDIV:
3505 case MOD:
3506 case UMOD:
3507 *total = COSTS_N_INSNS (20);
3508 return true;
3510 case FLOAT:
3511 case FIX:
3512 *total = 100;
3513 return true;
3515 default:
3516 return false;
3520 /* Determine the size of the fundamental move insn that will be used
3521 for the specified mode. */
3522 static inline int
3523 mov_insn_size (machine_mode mode, bool consider_sh2a)
3525 const int mode_sz = GET_MODE_SIZE (mode);
3527 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3528 || (TARGET_FMOVD && mode == DFmode))
3529 return mode_sz;
3530 else
3532 /* The max. available mode for actual move insns is SImode.
3533 Larger accesses will be split into multiple loads/stores. */
3534 const int max_mov_sz = GET_MODE_SIZE (SImode);
3535 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3539 /* Determine the maximum possible displacement for a move insn for the
3540 specified mode. */
3542 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3544 /* The 4 byte displacement move insns are the same as the 2 byte
3545 versions but take a 12 bit displacement. All we need to do is to
3546 scale the max. displacement value accordingly. */
3547 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3549 /* SH2A supports FPU move insns with 12 bit displacements.
3550 Other variants to do not support any kind of displacements for
3551 FPU move insns. */
3552 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3553 return 0;
3554 else
3556 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3557 const int mode_sz = GET_MODE_SIZE (mode);
3558 int r = 15 * mov_insn_sz * disp_scale;
3560 /* If the mov insn will be split into multiple loads/stores, the
3561 maximum possible displacement is a bit smaller. */
3562 if (mode_sz > mov_insn_sz)
3563 r -= mode_sz - mov_insn_sz;
3564 return r;
3568 /* Determine the alignment mask for a move insn of the
3569 specified mode. */
3570 static inline int
3571 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3573 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3574 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3577 /* Return the displacement value of a displacement address. */
3578 HOST_WIDE_INT
3579 sh_disp_addr_displacement (rtx x)
3581 gcc_assert (satisfies_constraint_Sdd (x));
3582 return INTVAL (XEXP (XEXP (x, 0), 1));
3585 /* Compute the cost of an address. */
3586 static int
3587 sh_address_cost (rtx x, machine_mode mode,
3588 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3590 /* 'GBR + 0'. Account one more because of R0 restriction. */
3591 if (REG_P (x) && REGNO (x) == GBR_REG)
3592 return 2;
3594 /* Simple reg, post-inc, pre-dec addressing. */
3595 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3596 return 1;
3598 /* 'reg + disp' addressing. */
3599 if (GET_CODE (x) == PLUS
3600 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3602 /* 'GBR + disp'. Account one more because of R0 restriction. */
3603 if (REGNO (XEXP (x, 0)) == GBR_REG
3604 && gbr_displacement (XEXP (x, 1), mode))
3605 return 2;
3607 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3609 if (offset == 0)
3610 return 1;
3612 /* The displacement would fit into a 2 byte move insn.
3613 HImode and QImode loads/stores with displacement put pressure on
3614 R0 which will most likely require another reg copy. Thus account
3615 a higher cost for that. */
3616 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3617 return (mode == HImode || mode == QImode) ? 2 : 1;
3619 /* The displacement would fit into a 4 byte move insn (SH2A). */
3620 if (TARGET_SH2A
3621 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3622 return 2;
3624 /* The displacement is probably out of range and will require extra
3625 calculations. */
3626 return 3;
3629 /* 'reg + reg' addressing. Account a slightly higher cost because of
3630 increased pressure on R0. */
3631 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1)))
3632 return 3;
3634 /* Not sure what it is - probably expensive. */
3635 return 10;
3638 /* Code to expand a shift. */
3639 static void
3640 gen_ashift (int type, int n, rtx reg)
3642 rtx n_rtx;
3644 /* Negative values here come from the shift_amounts array. */
3645 if (n < 0)
3647 if (type == ASHIFT)
3648 type = LSHIFTRT;
3649 else
3650 type = ASHIFT;
3651 n = -n;
3654 n_rtx = GEN_INT (n);
3655 gcc_assert (satisfies_constraint_P27 (n_rtx));
3657 switch (type)
3659 case ASHIFTRT:
3660 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3661 break;
3662 case LSHIFTRT:
3663 if (n == 1)
3664 emit_insn (gen_shlr (reg, reg));
3665 else
3666 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3667 break;
3668 case ASHIFT:
3669 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3670 break;
3671 default:
3672 gcc_unreachable ();
3676 /* Code to expand a HImode shift. */
3677 static void
3678 gen_ashift_hi (int type, int n, rtx reg)
3680 /* Negative values here come from the shift_amounts array. */
3681 if (n < 0)
3683 if (type == ASHIFT)
3684 type = LSHIFTRT;
3685 else
3686 type = ASHIFT;
3687 n = -n;
3690 switch (type)
3692 case ASHIFTRT:
3693 case LSHIFTRT:
3694 /* We don't have HImode right shift operations because using the
3695 ordinary 32 bit shift instructions for that doesn't generate proper
3696 zero/sign extension.
3697 gen_ashift_hi is only called in contexts where we know that the
3698 sign extension works out correctly. */
3700 int offset = 0;
3701 if (GET_CODE (reg) == SUBREG)
3703 offset = SUBREG_BYTE (reg);
3704 reg = SUBREG_REG (reg);
3706 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3707 break;
3709 case ASHIFT:
3710 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3711 break;
3715 /* Output RTL to split a constant shift into its component SH constant
3716 shift instructions. */
3717 void
3718 gen_shifty_op (int code, rtx *operands)
3720 int value = INTVAL (operands[2]);
3721 int max, i;
3723 /* Truncate the shift count in case it is out of bounds. */
3724 value = value & 31;
3726 if (value == 31)
3728 if (code == LSHIFTRT)
3730 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3731 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3732 return;
3734 else if (code == ASHIFT)
3736 /* There is a two instruction sequence for 31 bit left shifts,
3737 but it requires r0. */
3738 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3740 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3741 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3742 return;
3746 else if (value == 0)
3748 /* This can happen even when optimizing, if there were subregs before
3749 reload. Don't output a nop here, as this is never optimized away;
3750 use a no-op move instead. */
3751 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3752 return;
3755 max = ashl_lshr_seq[value].insn_count;
3756 for (i = 0; i < max; i++)
3757 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3760 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3761 don't matter. */
3762 void
3763 gen_shifty_hi_op (int code, rtx *operands)
3765 int value = INTVAL (operands[2]);
3766 int max, i;
3767 void (*gen_fun) (int, int, rtx);
3769 /* This operation is used by and_shl for SImode values with a few
3770 high bits known to be cleared. */
3771 value &= 31;
3772 if (value == 0)
3774 emit_insn (gen_nop ());
3775 return;
3778 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3779 if (code == ASHIFT)
3781 max = ext_ashl_lshr_seq[value].insn_count;
3782 for (i = 0; i < max; i++)
3783 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3785 else
3786 /* When shifting right, emit the shifts in reverse order, so that
3787 solitary negative values come first. */
3788 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3789 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3792 /* Output RTL for an arithmetic right shift.
3793 ??? Rewrite to use super-optimizer sequences. */
3794 bool
3795 expand_ashiftrt (rtx *operands)
3797 rtx wrk;
3798 char func[18];
3799 int value;
3801 if (TARGET_DYNSHIFT)
3803 if (!CONST_INT_P (operands[2]))
3805 rtx count = copy_to_mode_reg (SImode, operands[2]);
3806 emit_insn (gen_negsi2 (count, count));
3807 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3808 return true;
3810 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3811 > 1 + SH_DYNAMIC_SHIFT_COST)
3813 rtx count
3814 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3815 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3816 return true;
3819 if (!CONST_INT_P (operands[2]))
3820 return false;
3822 value = INTVAL (operands[2]) & 31;
3824 if (value == 31)
3826 /* If we are called from abs expansion, arrange things so that we
3827 we can use a single MT instruction that doesn't clobber the source,
3828 if LICM can hoist out the load of the constant zero. */
3829 if (currently_expanding_to_rtl)
3831 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3832 operands[1]));
3833 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3834 return true;
3836 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3837 return true;
3839 else if (value >= 16 && value <= 19)
3841 wrk = gen_reg_rtx (SImode);
3842 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3843 value -= 16;
3844 while (value--)
3845 gen_ashift (ASHIFTRT, 1, wrk);
3846 emit_move_insn (operands[0], wrk);
3847 return true;
3849 /* Expand a short sequence inline, longer call a magic routine. */
3850 else if (value <= 5)
3852 wrk = gen_reg_rtx (SImode);
3853 emit_move_insn (wrk, operands[1]);
3854 while (value--)
3855 gen_ashift (ASHIFTRT, 1, wrk);
3856 emit_move_insn (operands[0], wrk);
3857 return true;
3860 wrk = gen_reg_rtx (Pmode);
3862 /* Load the value into an arg reg and call a helper. */
3863 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3864 sprintf (func, "__ashiftrt_r4_%d", value);
3865 rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
3866 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
3867 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3868 return true;
3871 /* Try to find a good way to implement the combiner pattern
3872 [(set (match_operand:SI 0 "register_operand" "r")
3873 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3874 (match_operand:SI 2 "const_int_operand" "n"))
3875 (match_operand:SI 3 "const_int_operand" "n"))) .
3876 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3877 return 0 for simple right / left or left/right shift combination.
3878 return 1 for a combination of shifts with zero_extend.
3879 return 2 for a combination of shifts with an AND that needs r0.
3880 return 3 for a combination of shifts with an AND that needs an extra
3881 scratch register, when the three highmost bits of the AND mask are clear.
3882 return 4 for a combination of shifts with an AND that needs an extra
3883 scratch register, when any of the three highmost bits of the AND mask
3884 is set.
3885 If ATTRP is set, store an initial right shift width in ATTRP[0],
3886 and the instruction length in ATTRP[1] . These values are not valid
3887 when returning 0.
3888 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3889 shift_amounts for the last shift value that is to be used before the
3890 sign extend. */
3892 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3894 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3895 int left = INTVAL (left_rtx), right;
3896 int best = 0;
3897 int cost, best_cost = 10000;
3898 int best_right = 0, best_len = 0;
3899 int i;
3900 int can_ext;
3902 if (left < 0 || left > 31)
3903 return 0;
3904 if (CONST_INT_P (mask_rtx))
3905 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3906 else
3907 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3908 /* Can this be expressed as a right shift / left shift pair? */
3909 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3910 right = exact_log2 (lsb);
3911 mask2 = ~(mask + lsb - 1);
3912 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3913 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3914 if (! mask2)
3915 best_cost = ashl_lshr_seq[right].insn_count
3916 + ashl_lshr_seq[right + left].insn_count;
3917 /* mask has no trailing zeroes <==> ! right */
3918 else if (! right && mask2 == ~(lsb2 - 1))
3920 int late_right = exact_log2 (lsb2);
3921 best_cost = ashl_lshr_seq[left + late_right].insn_count
3922 + ashl_lshr_seq[late_right].insn_count;
3924 /* Try to use zero extend. */
3925 if (mask2 == ~(lsb2 - 1))
3927 int width, first;
3929 for (width = 8; width <= 16; width += 8)
3931 /* Can we zero-extend right away? */
3932 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3934 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3935 + ext_ashl_lshr_seq[left + right].insn_count;
3936 if (cost < best_cost)
3938 best = 1;
3939 best_cost = cost;
3940 best_right = right;
3941 best_len = cost;
3942 if (attrp)
3943 attrp[2] = -1;
3945 continue;
3947 /* ??? Could try to put zero extend into initial right shift,
3948 or even shift a bit left before the right shift. */
3949 /* Determine value of first part of left shift, to get to the
3950 zero extend cut-off point. */
3951 first = width - exact_log2 (lsb2) + right;
3952 if (first >= 0 && right + left - first >= 0)
3954 cost = ext_ashl_lshr_seq[right].insn_count
3955 + ext_ashl_lshr_seq[first].insn_count + 1
3956 + ext_ashl_lshr_seq[right + left - first].insn_count;
3958 if (cost < best_cost)
3960 best = 1;
3961 best_cost = cost;
3962 best_right = right;
3963 best_len = cost;
3964 if (attrp)
3965 attrp[2] = first;
3970 /* Try to use r0 AND pattern */
3971 for (i = 0; i <= 2; i++)
3973 if (i > right)
3974 break;
3975 if (! CONST_OK_FOR_K08 (mask >> i))
3976 continue;
3977 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
3978 if (cost < best_cost)
3980 best = 2;
3981 best_cost = cost;
3982 best_right = i;
3983 best_len = cost - 1;
3986 /* Try to use a scratch register to hold the AND operand. */
3987 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3988 for (i = 0; i <= 2; i++)
3990 if (i > right)
3991 break;
3992 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3993 + (can_ext
3994 ? ext_ashl_lshr_seq
3995 : ashl_lshr_seq)[left + i].insn_count;
3996 if (cost < best_cost)
3998 best = 4 - can_ext;
3999 best_cost = cost;
4000 best_right = i;
4001 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4005 if (attrp)
4007 attrp[0] = best_right;
4008 attrp[1] = best_len;
4010 return best;
4013 /* This is used in length attributes of the unnamed instructions
4014 corresponding to shl_and_kind return values of 1 and 2. */
4016 shl_and_length (rtx insn)
4018 rtx set_src, left_rtx, mask_rtx;
4019 int attributes[3];
4021 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4022 left_rtx = XEXP (XEXP (set_src, 0), 1);
4023 mask_rtx = XEXP (set_src, 1);
4024 shl_and_kind (left_rtx, mask_rtx, attributes);
4025 return attributes[1];
4028 /* This is used in length attribute of the and_shl_scratch instruction. */
4030 shl_and_scr_length (rtx insn)
4032 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4033 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4034 rtx op = XEXP (set_src, 0);
4035 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4036 op = XEXP (XEXP (op, 0), 0);
4037 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4040 /* Generate rtl for instructions for which shl_and_kind advised a particular
4041 method of generating them, i.e. returned zero. */
4042 bool
4043 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4045 int attributes[3];
4046 unsigned HOST_WIDE_INT mask;
4047 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4048 int right, total_shift;
4049 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4051 right = attributes[0];
4052 total_shift = INTVAL (left_rtx) + right;
4053 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4054 switch (kind)
4056 default:
4057 return true;
4058 case 1:
4060 int first = attributes[2];
4061 rtx operands[3];
4063 if (first < 0)
4065 emit_insn ((mask << right) <= 0xff
4066 ? gen_zero_extendqisi2 (dest,
4067 gen_lowpart (QImode, source))
4068 : gen_zero_extendhisi2 (dest,
4069 gen_lowpart (HImode, source)));
4070 source = dest;
4072 if (source != dest)
4073 emit_insn (gen_movsi (dest, source));
4074 operands[0] = dest;
4075 if (right)
4077 operands[2] = GEN_INT (right);
4078 gen_shifty_hi_op (LSHIFTRT, operands);
4080 if (first > 0)
4082 operands[2] = GEN_INT (first);
4083 gen_shifty_hi_op (ASHIFT, operands);
4084 total_shift -= first;
4085 mask <<= first;
4087 if (first >= 0)
4088 emit_insn (mask <= 0xff
4089 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4090 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4091 if (total_shift > 0)
4093 operands[2] = GEN_INT (total_shift);
4094 gen_shifty_hi_op (ASHIFT, operands);
4096 break;
4098 case 4:
4099 shift_gen_fun = gen_shifty_op;
4100 /* FALLTHRU */
4101 case 3:
4102 /* If the topmost bit that matters is set, set the topmost bits
4103 that don't matter. This way, we might be able to get a shorter
4104 signed constant. */
4105 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4106 mask |= (HOST_WIDE_INT) ((HOST_WIDE_INT_M1U) << (31 - total_shift));
4107 /* FALLTHRU */
4108 case 2:
4109 /* Don't expand fine-grained when combining, because that will
4110 make the pattern fail. */
4111 if (currently_expanding_to_rtl
4112 || reload_in_progress || reload_completed)
4114 rtx operands[3];
4116 /* Cases 3 and 4 should be handled by this split
4117 only while combining */
4118 gcc_assert (kind <= 2);
4119 if (right)
4121 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4122 source = dest;
4124 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4125 if (total_shift)
4127 operands[0] = dest;
4128 operands[1] = dest;
4129 operands[2] = GEN_INT (total_shift);
4130 shift_gen_fun (ASHIFT, operands);
4132 break;
4134 else
4136 int neg = 0;
4137 if (kind != 4 && total_shift < 16)
4139 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4140 if (neg > 0)
4141 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4142 else
4143 neg = 0;
4145 emit_insn (gen_and_shl_scratch (dest, source,
4146 GEN_INT (right),
4147 GEN_INT (mask),
4148 GEN_INT (total_shift + neg),
4149 GEN_INT (neg)));
4150 emit_insn (gen_movsi (dest, dest));
4151 break;
4154 return false;
4157 /* Try to find a good way to implement the combiner pattern
4158 [(set (match_operand:SI 0 "register_operand" "=r")
4159 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4160 (match_operand:SI 2 "const_int_operand" "n")
4161 (match_operand:SI 3 "const_int_operand" "n")
4162 (const_int 0)))
4163 (clobber (reg:SI T_REG))]
4164 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4165 return 0 for simple left / right shift combination.
4166 return 1 for left shift / 8 bit sign extend / left shift.
4167 return 2 for left shift / 16 bit sign extend / left shift.
4168 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4169 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4170 return 5 for left shift / 16 bit sign extend / right shift
4171 return 6 for < 8 bit sign extend / left shift.
4172 return 7 for < 8 bit sign extend / left shift / single right shift.
4173 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4175 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4177 int left, size, insize, ext;
4178 int cost = 0, best_cost;
4179 int kind;
4181 left = INTVAL (left_rtx);
4182 size = INTVAL (size_rtx);
4183 insize = size - left;
4184 gcc_assert (insize > 0);
4185 /* Default to left / right shift. */
4186 kind = 0;
4187 best_cost = ashl_lshr_seq[32 - insize].insn_count
4188 + ashl_lshr_seq[32 - size].insn_count;
4189 if (size <= 16)
4191 /* 16 bit shift / sign extend / 16 bit shift */
4192 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4193 + ashl_lshr_seq[16 - size].insn_count;
4194 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4195 below, by alternative 3 or something even better. */
4196 if (cost < best_cost)
4198 kind = 5;
4199 best_cost = cost;
4202 /* Try a plain sign extend between two shifts. */
4203 for (ext = 16; ext >= insize; ext -= 8)
4205 if (ext <= size)
4207 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4208 + ashl_lshr_seq[size - ext].insn_count;
4209 if (cost < best_cost)
4211 kind = ext / (unsigned) 8;
4212 best_cost = cost;
4215 /* Check if we can do a sloppy shift with a final signed shift
4216 restoring the sign. */
4217 if (EXT_SHIFT_SIGNED (size - ext))
4218 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4219 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4220 /* If not, maybe it's still cheaper to do the second shift sloppy,
4221 and do a final sign extend? */
4222 else if (size <= 16)
4223 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4224 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4225 + 1;
4226 else
4227 continue;
4228 if (cost < best_cost)
4230 kind = ext / (unsigned) 8 + 2;
4231 best_cost = cost;
4234 /* Check if we can sign extend in r0 */
4235 if (insize < 8)
4237 cost = 3 + ashl_lshr_seq[left].insn_count;
4238 if (cost < best_cost)
4240 kind = 6;
4241 best_cost = cost;
4243 /* Try the same with a final signed shift. */
4244 if (left < 31)
4246 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4247 if (cost < best_cost)
4249 kind = 7;
4250 best_cost = cost;
4254 if (TARGET_DYNSHIFT)
4256 /* Try to use a dynamic shift. */
4257 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4258 if (cost < best_cost)
4260 kind = 0;
4261 best_cost = cost;
4264 if (costp)
4265 *costp = cost;
4266 return kind;
4269 /* Function to be used in the length attribute of the instructions
4270 implementing this pattern. */
4272 shl_sext_length (rtx insn)
4274 rtx set_src, left_rtx, size_rtx;
4275 int cost;
4277 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4278 left_rtx = XEXP (XEXP (set_src, 0), 1);
4279 size_rtx = XEXP (set_src, 1);
4280 shl_sext_kind (left_rtx, size_rtx, &cost);
4281 return cost;
4284 /* Generate rtl for this pattern */
4285 bool
4286 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4288 int kind;
4289 int left, size, insize, cost;
4290 rtx operands[3];
4292 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4293 left = INTVAL (left_rtx);
4294 size = INTVAL (size_rtx);
4295 insize = size - left;
4296 switch (kind)
4298 case 1:
4299 case 2:
4300 case 3:
4301 case 4:
4303 int ext = kind & 1 ? 8 : 16;
4304 int shift2 = size - ext;
4306 /* Don't expand fine-grained when combining, because that will
4307 make the pattern fail. */
4308 if (! currently_expanding_to_rtl
4309 && ! reload_in_progress && ! reload_completed)
4311 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4312 emit_insn (gen_movsi (dest, source));
4313 break;
4315 if (dest != source)
4316 emit_insn (gen_movsi (dest, source));
4317 operands[0] = dest;
4318 if (ext - insize)
4320 operands[2] = GEN_INT (ext - insize);
4321 gen_shifty_hi_op (ASHIFT, operands);
4323 emit_insn (kind & 1
4324 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4325 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4326 if (kind <= 2)
4328 if (shift2)
4330 operands[2] = GEN_INT (shift2);
4331 gen_shifty_op (ASHIFT, operands);
4334 else
4336 if (shift2 > 0)
4338 if (EXT_SHIFT_SIGNED (shift2))
4340 operands[2] = GEN_INT (shift2 + 1);
4341 gen_shifty_op (ASHIFT, operands);
4342 operands[2] = const1_rtx;
4343 gen_shifty_op (ASHIFTRT, operands);
4344 break;
4346 operands[2] = GEN_INT (shift2);
4347 gen_shifty_hi_op (ASHIFT, operands);
4349 else if (shift2)
4351 operands[2] = GEN_INT (-shift2);
4352 gen_shifty_hi_op (LSHIFTRT, operands);
4354 emit_insn (size <= 8
4355 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4356 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4358 break;
4360 case 5:
4362 int i = 16 - size;
4363 if (! currently_expanding_to_rtl
4364 && ! reload_in_progress && ! reload_completed)
4365 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4366 else
4368 operands[0] = dest;
4369 operands[2] = GEN_INT (16 - insize);
4370 gen_shifty_hi_op (ASHIFT, operands);
4371 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4373 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4374 while (--i >= 0)
4375 gen_ashift (ASHIFTRT, 1, dest);
4376 break;
4378 case 6:
4379 case 7:
4380 /* Don't expand fine-grained when combining, because that will
4381 make the pattern fail. */
4382 if (! currently_expanding_to_rtl
4383 && ! reload_in_progress && ! reload_completed)
4385 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4386 emit_insn (gen_movsi (dest, source));
4387 break;
4389 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4390 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4391 emit_insn (gen_addsi3 (dest, dest, GEN_INT (HOST_WIDE_INT_M1U << (insize - 1))));
4392 operands[0] = dest;
4393 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4394 gen_shifty_op (ASHIFT, operands);
4395 if (kind == 7)
4396 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4397 break;
4398 default:
4399 return true;
4401 return false;
4404 typedef struct label_ref_list_d
4406 rtx_code_label *label;
4407 struct label_ref_list_d *next;
4408 } *label_ref_list_t;
4410 static object_allocator<label_ref_list_d> label_ref_list_d_pool
4411 ("label references list");
4413 /* The SH cannot load a large constant into a register, constants have to
4414 come from a pc relative load. The reference of a pc relative load
4415 instruction must be less than 1k in front of the instruction. This
4416 means that we often have to dump a constant inside a function, and
4417 generate code to branch around it.
4419 It is important to minimize this, since the branches will slow things
4420 down and make things bigger.
4422 Worst case code looks like:
4424 mov.l L1,rn
4425 bra L2
4427 align
4428 L1: .long value
4432 mov.l L3,rn
4433 bra L4
4435 align
4436 L3: .long value
4440 We fix this by performing a scan before scheduling, which notices which
4441 instructions need to have their operands fetched from the constant table
4442 and builds the table.
4444 The algorithm is:
4446 scan, find an instruction which needs a pcrel move. Look forward, find the
4447 last barrier which is within MAX_COUNT bytes of the requirement.
4448 If there isn't one, make one. Process all the instructions between
4449 the find and the barrier.
4451 In the above example, we can tell that L3 is within 1k of L1, so
4452 the first move can be shrunk from the 3 insn+constant sequence into
4453 just 1 insn, and the constant moved to L3 to make:
4455 mov.l L1,rn
4457 mov.l L3,rn
4458 bra L4
4460 align
4461 L3:.long value
4462 L4:.long value
4464 Then the second move becomes the target for the shortening process. */
4466 typedef struct
4468 rtx value; /* Value in table. */
4469 rtx_code_label *label; /* Label of value. */
4470 label_ref_list_t wend; /* End of window. */
4471 machine_mode mode; /* Mode of value. */
4473 /* True if this constant is accessed as part of a post-increment
4474 sequence. Note that HImode constants are never accessed in this way. */
4475 bool part_of_sequence_p;
4476 } pool_node;
4478 /* The maximum number of constants that can fit into one pool, since
4479 constants in the range 0..510 are at least 2 bytes long, and in the
4480 range from there to 1018 at least 4 bytes. */
4482 #define MAX_POOL_SIZE 372
4483 static pool_node pool_vector[MAX_POOL_SIZE];
4484 static int pool_size;
4485 static rtx_code_label *pool_window_label;
4486 static int pool_window_last;
4488 static int max_labelno_before_reorg;
4490 /* ??? If we need a constant in HImode which is the truncated value of a
4491 constant we need in SImode, we could combine the two entries thus saving
4492 two bytes. Is this common enough to be worth the effort of implementing
4493 it? */
4495 /* ??? This stuff should be done at the same time that we shorten branches.
4496 As it is now, we must assume that all branches are the maximum size, and
4497 this causes us to almost always output constant pools sooner than
4498 necessary. */
4500 /* Add a constant to the pool and return its label. */
4501 static rtx_code_label *
4502 add_constant (rtx x, machine_mode mode, rtx last_value)
4504 rtx_code_label *lab, *new_rtx;
4505 label_ref_list_t ref, newref;
4507 /* First see if we've already got it. */
4508 for (int i = 0; i < pool_size; i++)
4510 if (x->code == pool_vector[i].value->code
4511 && mode == pool_vector[i].mode)
4513 if (x->code == CODE_LABEL)
4515 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4516 continue;
4518 if (rtx_equal_p (x, pool_vector[i].value))
4520 lab = new_rtx = 0;
4521 if (! last_value
4522 || ! i
4523 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4525 new_rtx = gen_label_rtx ();
4526 LABEL_REFS (new_rtx) = pool_vector[i].label;
4527 pool_vector[i].label = lab = new_rtx;
4529 if (lab && pool_window_label)
4531 newref = label_ref_list_d_pool.allocate ();
4532 newref->label = pool_window_label;
4533 ref = pool_vector[pool_window_last].wend;
4534 newref->next = ref;
4535 pool_vector[pool_window_last].wend = newref;
4537 if (new_rtx)
4538 pool_window_label = new_rtx;
4539 pool_window_last = i;
4540 return lab;
4545 /* Need a new one. */
4546 pool_vector[pool_size].value = x;
4547 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4549 lab = 0;
4550 pool_vector[pool_size - 1].part_of_sequence_p = true;
4552 else
4553 lab = gen_label_rtx ();
4554 pool_vector[pool_size].mode = mode;
4555 pool_vector[pool_size].label = lab;
4556 pool_vector[pool_size].wend = NULL;
4557 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4558 if (lab && pool_window_label)
4560 newref = label_ref_list_d_pool.allocate ();
4561 newref->label = pool_window_label;
4562 ref = pool_vector[pool_window_last].wend;
4563 newref->next = ref;
4564 pool_vector[pool_window_last].wend = newref;
4566 if (lab)
4567 pool_window_label = lab;
4568 pool_window_last = pool_size;
4569 pool_size++;
4570 return lab;
4573 /* Output the literal table. START, if nonzero, is the first instruction
4574 this table is needed for, and also indicates that there is at least one
4575 casesi_worker_2 instruction; We have to emit the operand3 labels from
4576 these insns at a 4-byte aligned position. BARRIER is the barrier
4577 after which we are to place the table. */
4578 static void
4579 dump_table (rtx_insn *start, rtx_insn *barrier)
4581 rtx_insn *scan = barrier;
4582 bool need_align = true;
4583 rtx lab;
4584 label_ref_list_t ref;
4585 bool have_df = false;
4587 /* Do two passes, first time dump out the HI sized constants. */
4589 for (int i = 0; i < pool_size; i++)
4591 pool_node *p = &pool_vector[i];
4593 if (p->mode == HImode)
4595 if (need_align)
4597 scan = emit_insn_after (gen_align_2 (), scan);
4598 need_align = false;
4600 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4601 scan = emit_label_after (lab, scan);
4602 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4603 scan);
4604 for (ref = p->wend; ref; ref = ref->next)
4606 lab = ref->label;
4607 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4610 else if (p->mode == DFmode)
4611 have_df = true;
4614 need_align = true;
4616 if (start)
4618 scan = emit_insn_after (gen_align_4 (), scan);
4619 need_align = false;
4620 for (; start != barrier; start = NEXT_INSN (start))
4621 if (NONJUMP_INSN_P (start)
4622 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4624 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4625 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4627 scan = emit_label_after (lab, scan);
4630 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4632 rtx_insn *align_insn = NULL;
4634 scan = emit_label_after (gen_label_rtx (), scan);
4635 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4636 need_align = false;
4638 for (int i = 0; i < pool_size; i++)
4640 pool_node *p = &pool_vector[i];
4642 switch (p->mode)
4644 case E_HImode:
4645 break;
4646 case E_SImode:
4647 case E_SFmode:
4648 if (align_insn && !p->part_of_sequence_p)
4650 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4651 emit_label_before (lab, align_insn);
4652 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4653 align_insn);
4654 for (ref = p->wend; ref; ref = ref->next)
4656 lab = ref->label;
4657 emit_insn_before (gen_consttable_window_end (lab),
4658 align_insn);
4660 delete_insn (align_insn);
4661 align_insn = NULL;
4662 continue;
4664 else
4666 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4667 scan = emit_label_after (lab, scan);
4668 scan = emit_insn_after (gen_consttable_4 (p->value,
4669 const0_rtx), scan);
4670 need_align = ! need_align;
4672 break;
4673 case E_DFmode:
4674 if (need_align)
4676 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4677 align_insn = scan;
4678 need_align = false;
4680 /* FALLTHRU */
4681 case E_DImode:
4682 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4683 scan = emit_label_after (lab, scan);
4684 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4685 scan);
4686 break;
4687 default:
4688 gcc_unreachable ();
4691 if (p->mode != HImode)
4693 for (ref = p->wend; ref; ref = ref->next)
4695 lab = ref->label;
4696 scan = emit_insn_after (gen_consttable_window_end (lab),
4697 scan);
4702 pool_size = 0;
4705 for (int i = 0; i < pool_size; i++)
4707 pool_node *p = &pool_vector[i];
4709 switch (p->mode)
4711 case E_HImode:
4712 break;
4713 case E_SImode:
4714 case E_SFmode:
4715 if (need_align)
4717 need_align = false;
4718 scan = emit_label_after (gen_label_rtx (), scan);
4719 scan = emit_insn_after (gen_align_4 (), scan);
4721 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4722 scan = emit_label_after (lab, scan);
4723 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4724 scan);
4725 break;
4726 case E_DFmode:
4727 case E_DImode:
4728 if (need_align)
4730 need_align = false;
4731 scan = emit_label_after (gen_label_rtx (), scan);
4732 scan = emit_insn_after (gen_align_4 (), scan);
4734 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4735 scan = emit_label_after (lab, scan);
4736 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4737 scan);
4738 break;
4739 default:
4740 gcc_unreachable ();
4743 if (p->mode != HImode)
4745 for (ref = p->wend; ref; ref = ref->next)
4747 lab = ref->label;
4748 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4753 scan = emit_insn_after (gen_consttable_end (), scan);
4754 scan = emit_barrier_after (scan);
4755 pool_size = 0;
4756 pool_window_label = NULL;
4757 pool_window_last = 0;
4760 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4762 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4764 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4765 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4766 need to fix it if the input value is CONST_OK_FOR_I08. */
4767 static bool
4768 broken_move (rtx_insn *insn)
4770 if (NONJUMP_INSN_P (insn))
4772 rtx pat = PATTERN (insn);
4773 if (GET_CODE (pat) == PARALLEL)
4774 pat = XVECEXP (pat, 0, 0);
4775 if (GET_CODE (pat) == SET
4776 /* We can load any 8-bit value if we don't care what the high
4777 order bits end up as. */
4778 && GET_MODE (SET_DEST (pat)) != QImode
4779 && (CONSTANT_P (SET_SRC (pat))
4780 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4781 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4782 /* Match mova_const. */
4783 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4784 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4785 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4786 && ! (TARGET_SH2E
4787 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4788 && (fp_zero_operand (SET_SRC (pat))
4789 || fp_one_operand (SET_SRC (pat)))
4790 /* In general we don't know the current setting of fpscr, so
4791 disable fldi.
4792 There is an exception if this was a register-register move
4793 before reload - and hence it was ascertained that we have
4794 single precision setting - and in a post-reload optimization
4795 we changed this to do a constant load. In that case
4796 we don't have an r0 clobber, hence we must use fldi. */
4797 && (TARGET_FMOVD
4798 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4799 == SCRATCH))
4800 && REG_P (SET_DEST (pat))
4801 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4802 && ! (TARGET_SH2A
4803 && GET_MODE (SET_DEST (pat)) == SImode
4804 && (satisfies_constraint_I20 (SET_SRC (pat))
4805 || satisfies_constraint_I28 (SET_SRC (pat))))
4806 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4807 return true;
4810 return false;
4813 /* Return true if the specified insn is a mova insn. */
4814 static bool
4815 mova_p (rtx_insn *insn)
4817 return (NONJUMP_INSN_P (insn)
4818 && GET_CODE (PATTERN (insn)) == SET
4819 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4820 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4821 /* Don't match mova_const. */
4822 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4825 /* Fix up a mova from a switch that went out of range. */
4826 static void
4827 fixup_mova (rtx_insn *mova)
4829 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4830 if (! flag_pic)
4832 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4833 INSN_CODE (mova) = -1;
4835 else
4837 rtx_insn *worker = mova;
4838 rtx_code_label *lab = gen_label_rtx ();
4839 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4843 worker = NEXT_INSN (worker);
4844 gcc_assert (worker
4845 && !LABEL_P (worker)
4846 && !JUMP_P (worker));
4847 } while (NOTE_P (worker)
4848 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4849 wpat = PATTERN (worker);
4850 wpat0 = XVECEXP (wpat, 0, 0);
4851 wpat1 = XVECEXP (wpat, 0, 1);
4852 wsrc = SET_SRC (wpat0);
4853 PATTERN (worker) = (gen_casesi_worker_2
4854 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4855 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4856 XEXP (wpat1, 0)));
4857 INSN_CODE (worker) = -1;
4858 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4859 base = gen_rtx_LABEL_REF (Pmode, lab);
4860 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4861 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4862 INSN_CODE (mova) = -1;
4866 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4867 *num_mova, and check if the new mova is not nested within the first one.
4868 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4869 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4870 static int
4871 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
4873 int n_addr = 0; /* Initialization to shut up spurious warning. */
4874 int f_target, n_target = 0; /* Likewise. */
4876 if (optimize)
4878 /* If NEW_MOVA has no address yet, it will be handled later. */
4879 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4880 return -1;
4882 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4883 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4884 if (n_addr > n_target || n_addr + 1022 < n_target)
4886 /* Change the mova into a load.
4887 broken_move will then return true for it. */
4888 fixup_mova (new_mova);
4889 return 1;
4892 if (!(*num_mova)++)
4894 *first_mova = new_mova;
4895 return 2;
4897 if (!optimize
4898 || ((f_target
4899 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4900 >= n_target))
4901 return -1;
4903 (*num_mova)--;
4904 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4905 > n_target - n_addr)
4907 fixup_mova (*first_mova);
4908 return 0;
4910 else
4912 fixup_mova (new_mova);
4913 return 1;
4917 /* Find the last barrier from insn FROM which is close enough to hold the
4918 constant pool. If we can't find one, then create one near the end of
4919 the range. */
4920 static rtx_insn *
4921 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
4923 int count_si = 0;
4924 int count_hi = 0;
4925 int found_hi = 0;
4926 int found_si = 0;
4927 int hi_align = 2;
4928 int si_align = 2;
4929 int leading_mova = num_mova;
4930 rtx_insn *barrier_before_mova = NULL;
4931 rtx_insn *found_barrier = NULL;
4932 rtx_insn *good_barrier = NULL;
4933 int si_limit;
4934 int hi_limit;
4935 rtx_insn *orig = from;
4936 rtx_insn *last_got = NULL;
4937 rtx_insn *last_symoff = NULL;
4939 /* For HImode: range is 510, add 4 because pc counts from address of
4940 second instruction after this one, subtract 2 for the jump instruction
4941 that we may need to emit before the table, subtract 2 for the instruction
4942 that fills the jump delay slot (in very rare cases, reorg will take an
4943 instruction from after the constant pool or will leave the delay slot
4944 empty). This gives 510.
4945 For SImode: range is 1020, add 4 because pc counts from address of
4946 second instruction after this one, subtract 2 in case pc is 2 byte
4947 aligned, subtract 2 for the jump instruction that we may need to emit
4948 before the table, subtract 2 for the instruction that fills the jump
4949 delay slot. This gives 1018. */
4951 /* The branch will always be shortened now that the reference address for
4952 forward branches is the successor address, thus we need no longer make
4953 adjustments to the [sh]i_limit for -O0. */
4955 si_limit = 1018;
4956 hi_limit = 510;
4958 while (from && count_si < si_limit && count_hi < hi_limit)
4960 int inc = get_attr_length (from);
4961 int new_align = 1;
4963 /* If this is a label that existed at the time of the compute_alignments
4964 call, determine the alignment. N.B. When find_barrier recurses for
4965 an out-of-reach mova, we might see labels at the start of previously
4966 inserted constant tables. */
4967 if (LABEL_P (from)
4968 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4970 if (optimize)
4971 new_align = 1 << label_to_alignment (from);
4972 else if (BARRIER_P (prev_nonnote_insn (from)))
4973 new_align = 1 << barrier_align (from);
4974 else
4975 new_align = 1;
4976 inc = 0;
4978 /* In case we are scanning a constant table because of recursion, check
4979 for explicit alignments. If the table is long, we might be forced
4980 to emit the new table in front of it; the length of the alignment
4981 might be the last straw. */
4982 else if (NONJUMP_INSN_P (from)
4983 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4984 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4985 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4986 /* When we find the end of a constant table, paste the new constant
4987 at the end. That is better than putting it in front because
4988 this way, we don't need extra alignment for adding a 4-byte-aligned
4989 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4990 else if (NONJUMP_INSN_P (from)
4991 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4992 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4993 return from;
4995 if (BARRIER_P (from))
4997 rtx_insn *next;
4999 found_barrier = from;
5001 /* If we are at the end of the function, or in front of an alignment
5002 instruction, we need not insert an extra alignment. We prefer
5003 this kind of barrier. */
5004 if (barrier_align (from) > 2)
5005 good_barrier = from;
5007 /* If we are at the end of a hot/cold block, dump the constants
5008 here. */
5009 next = NEXT_INSN (from);
5010 if (next
5011 && NOTE_P (next)
5012 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5013 break;
5016 if (broken_move (from))
5018 rtx pat, src, dst;
5019 machine_mode mode;
5021 pat = PATTERN (from);
5022 if (GET_CODE (pat) == PARALLEL)
5023 pat = XVECEXP (pat, 0, 0);
5024 src = SET_SRC (pat);
5025 dst = SET_DEST (pat);
5026 mode = GET_MODE (dst);
5028 /* GOT pcrelat setting comes in pair of
5029 mova .L8,r0
5030 mov.l .L8,r12
5031 instructions. (plus add r0,r12).
5032 Remember if we see one without the other. */
5033 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5034 last_got = last_got ? NULL : from;
5035 else if (PIC_ADDR_P (src))
5036 last_got = last_got ? NULL : from;
5038 /* We must explicitly check the mode, because sometimes the
5039 front end will generate code to load unsigned constants into
5040 HImode targets without properly sign extending them. */
5041 if (mode == HImode
5042 || (mode == SImode && satisfies_constraint_I16 (src)
5043 && REGNO (dst) != FPUL_REG))
5045 found_hi += 2;
5046 /* We put the short constants before the long constants, so
5047 we must count the length of short constants in the range
5048 for the long constants. */
5049 /* ??? This isn't optimal, but is easy to do. */
5050 si_limit -= 2;
5052 else
5054 /* We dump DF/DI constants before SF/SI ones, because
5055 the limit is the same, but the alignment requirements
5056 are higher. We may waste up to 4 additional bytes
5057 for alignment, and the DF/DI constant may have
5058 another SF/SI constant placed before it. */
5059 while (si_align > 2 && found_si + si_align - 2 > count_si)
5060 si_align >>= 1;
5061 if (found_si > count_si)
5062 count_si = found_si;
5063 found_si += GET_MODE_SIZE (mode);
5064 if (num_mova)
5065 si_limit -= GET_MODE_SIZE (mode);
5069 if (mova_p (from))
5071 switch (untangle_mova (&num_mova, &mova, from))
5073 case 1:
5074 if (flag_pic)
5076 rtx src = SET_SRC (PATTERN (from));
5077 if (GET_CODE (src) == CONST
5078 && GET_CODE (XEXP (src, 0)) == UNSPEC
5079 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5080 last_symoff = from;
5082 break;
5083 case 0: return find_barrier (0, 0, mova);
5084 case 2:
5086 leading_mova = 0;
5087 barrier_before_mova
5088 = good_barrier ? good_barrier : found_barrier;
5090 default: break;
5092 if (found_si > count_si)
5093 count_si = found_si;
5095 else if (JUMP_TABLE_DATA_P (from)
5096 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5098 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5099 || (num_mova
5100 && (prev_nonnote_insn (from)
5101 == XEXP (MOVA_LABELREF (mova), 0))))
5102 num_mova--;
5103 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5105 /* We have just passed the barrier in front of the
5106 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5107 the ADDR_DIFF_VEC is accessed as data, just like our pool
5108 constants, this is a good opportunity to accommodate what
5109 we have gathered so far.
5110 If we waited any longer, we could end up at a barrier in
5111 front of code, which gives worse cache usage for separated
5112 instruction / data caches. */
5113 good_barrier = found_barrier;
5114 break;
5116 else
5118 rtx body = PATTERN (from);
5119 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5122 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5123 else if (JUMP_P (from)
5124 && ! TARGET_SH2
5125 && ! optimize_size)
5126 new_align = 4;
5128 /* There is a possibility that a bf is transformed into a bf/s by the
5129 delay slot scheduler. */
5130 if (JUMP_P (from)
5131 && get_attr_type (from) == TYPE_CBRANCH
5132 && ! sequence_insn_p (from))
5133 inc += 2;
5135 if (found_si)
5137 count_si += inc;
5138 if (new_align > si_align)
5140 si_limit -= (count_si - 1) & (new_align - si_align);
5141 si_align = new_align;
5143 count_si = (count_si + new_align - 1) & -new_align;
5145 if (found_hi)
5147 count_hi += inc;
5148 if (new_align > hi_align)
5150 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5151 hi_align = new_align;
5153 count_hi = (count_hi + new_align - 1) & -new_align;
5155 from = NEXT_INSN (from);
5158 if (num_mova)
5160 if (leading_mova)
5162 /* Try as we might, the leading mova is out of range. Change
5163 it into a load (which will become a pcload) and retry. */
5164 fixup_mova (mova);
5165 return find_barrier (0, 0, mova);
5167 else
5169 /* Insert the constant pool table before the mova instruction,
5170 to prevent the mova label reference from going out of range. */
5171 from = mova;
5172 good_barrier = found_barrier = barrier_before_mova;
5176 if (found_barrier)
5178 if (good_barrier && next_real_insn (found_barrier))
5179 found_barrier = good_barrier;
5181 else
5183 /* We didn't find a barrier in time to dump our stuff,
5184 so we'll make one. */
5185 rtx_code_label *label = gen_label_rtx ();
5187 /* Don't emit a constant table in the middle of insns for
5188 casesi_worker_2. This is a bit overkill but is enough
5189 because casesi_worker_2 wouldn't appear so frequently. */
5190 if (last_symoff)
5191 from = last_symoff;
5193 /* If we exceeded the range, then we must back up over the last
5194 instruction we looked at. Otherwise, we just need to undo the
5195 NEXT_INSN at the end of the loop. */
5196 if (PREV_INSN (from) != orig
5197 && (count_hi > hi_limit || count_si > si_limit))
5198 from = PREV_INSN (PREV_INSN (from));
5199 else
5200 from = PREV_INSN (from);
5202 /* Don't emit a constant table int the middle of global pointer setting,
5203 since that that would move the addressing base GOT into another table.
5204 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5205 in the pool anyway, so just move up the whole constant pool.
5207 However, avoid doing so when the last single GOT mov is the starting
5208 insn itself. Going past above the start insn would create a negative
5209 offset, causing errors. */
5210 if (last_got && last_got != orig)
5211 from = PREV_INSN (last_got);
5213 /* Don't insert the constant pool table at the position which
5214 may be the landing pad. */
5215 if (flag_exceptions
5216 && CALL_P (from)
5217 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5218 from = PREV_INSN (from);
5220 /* Walk back to be just before any jump or label.
5221 Putting it before a label reduces the number of times the branch
5222 around the constant pool table will be hit. Putting it before
5223 a jump makes it more likely that the bra delay slot will be
5224 filled. */
5225 while (NOTE_P (from) || JUMP_P (from)
5226 || LABEL_P (from))
5227 from = PREV_INSN (from);
5229 /* Make sure we do not split between a call and its corresponding
5230 CALL_ARG_LOCATION note. */
5231 if (CALL_P (from))
5233 rtx_insn *next = NEXT_INSN (from);
5234 if (next && NOTE_P (next)
5235 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5236 from = next;
5239 from = emit_jump_insn_after (gen_jump (label), from);
5240 JUMP_LABEL (from) = label;
5241 LABEL_NUSES (label) = 1;
5242 found_barrier = emit_barrier_after (from);
5243 emit_label_after (label, found_barrier);
5246 return found_barrier;
5249 /* If the instruction INSN is implemented by a special function, and we can
5250 positively find the register that is used to call the sfunc, and this
5251 register is not used anywhere else in this instruction - except as the
5252 destination of a set, return this register; else, return 0. */
5254 sfunc_uses_reg (rtx_insn *insn)
5256 int i;
5257 rtx pattern, part, reg_part, reg;
5259 if (!NONJUMP_INSN_P (insn))
5260 return NULL_RTX;
5261 pattern = PATTERN (insn);
5262 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5263 return NULL_RTX;
5265 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5267 part = XVECEXP (pattern, 0, i);
5268 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5269 reg_part = part;
5271 if (! reg_part)
5272 return NULL_RTX;
5273 reg = XEXP (reg_part, 0);
5274 for (int i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5276 part = XVECEXP (pattern, 0, i);
5277 if (part == reg_part || GET_CODE (part) == CLOBBER)
5278 continue;
5279 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5280 && REG_P (SET_DEST (part)))
5281 ? SET_SRC (part) : part)))
5282 return NULL_RTX;
5284 return reg;
5287 /* See if the only way in which INSN uses REG is by calling it, or by
5288 setting it while calling it. Set *SET to a SET rtx if the register
5289 is set by INSN. */
5290 static bool
5291 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5293 *set = NULL_RTX;
5295 rtx reg2 = sfunc_uses_reg (insn);
5296 if (reg2 && REGNO (reg2) == REGNO (reg))
5298 rtx pattern = single_set (insn);
5299 if (pattern
5300 && REG_P (SET_DEST (pattern))
5301 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5302 *set = pattern;
5303 return false;
5305 if (!CALL_P (insn))
5307 /* We don't use rtx_equal_p because we don't care if the mode is
5308 different. */
5309 rtx pattern = single_set (insn);
5310 if (pattern
5311 && REG_P (SET_DEST (pattern))
5312 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5314 rtx par, part;
5315 int i;
5317 *set = pattern;
5318 par = PATTERN (insn);
5319 if (GET_CODE (par) == PARALLEL)
5320 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5322 part = XVECEXP (par, 0, i);
5323 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5324 return true;
5326 return reg_mentioned_p (reg, SET_SRC (pattern));
5329 return true;
5332 rtx pattern = PATTERN (insn);
5334 if (GET_CODE (pattern) == PARALLEL)
5336 for (int i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5337 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5338 return true;
5339 pattern = XVECEXP (pattern, 0, 0);
5342 if (GET_CODE (pattern) == SET)
5344 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5346 /* We don't use rtx_equal_p, because we don't care if the
5347 mode is different. */
5348 if (!REG_P (SET_DEST (pattern))
5349 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5350 return true;
5352 *set = pattern;
5355 pattern = SET_SRC (pattern);
5358 if (GET_CODE (pattern) != CALL
5359 || !MEM_P (XEXP (pattern, 0))
5360 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5361 return true;
5363 return false;
5366 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5367 general registers. Bits 0..15 mean that the respective registers
5368 are used as inputs in the instruction. Bits 16..31 mean that the
5369 registers 0..15, respectively, are used as outputs, or are clobbered.
5370 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5372 regs_used (rtx x, int is_dest)
5374 enum rtx_code code;
5375 const char *fmt;
5376 int used = 0;
5378 if (! x)
5379 return used;
5380 code = GET_CODE (x);
5381 switch (code)
5383 case REG:
5384 if (REGNO (x) < 16)
5385 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5386 << (REGNO (x) + is_dest));
5387 return 0;
5388 case SUBREG:
5390 rtx y = SUBREG_REG (x);
5392 if (!REG_P (y))
5393 break;
5394 if (REGNO (y) < 16)
5395 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5396 << (REGNO (y) +
5397 subreg_regno_offset (REGNO (y),
5398 GET_MODE (y),
5399 SUBREG_BYTE (x),
5400 GET_MODE (x)) + is_dest));
5401 return 0;
5403 case SET:
5404 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5405 case RETURN:
5406 /* If there was a return value, it must have been indicated with USE. */
5407 return 0x00ffff00;
5408 case CLOBBER:
5409 is_dest = 1;
5410 break;
5411 case MEM:
5412 is_dest = 0;
5413 break;
5414 case CALL:
5415 used |= 0x00ff00f0;
5416 break;
5417 default:
5418 break;
5421 fmt = GET_RTX_FORMAT (code);
5423 for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5425 if (fmt[i] == 'E')
5427 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
5428 used |= regs_used (XVECEXP (x, i, j), is_dest);
5430 else if (fmt[i] == 'e')
5431 used |= regs_used (XEXP (x, i), is_dest);
5433 return used;
5436 /* Create an instruction that prevents redirection of a conditional branch
5437 to the destination of the JUMP with address ADDR.
5438 If the branch needs to be implemented as an indirect jump, try to find
5439 a scratch register for it.
5440 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5441 If any preceding insn that doesn't fit into a delay slot is good enough,
5442 pass 1. Pass 2 if a definite blocking insn is needed.
5443 -1 is used internally to avoid deep recursion.
5444 If a blocking instruction is made or recognized, return it. */
5445 static rtx_insn *
5446 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5448 int dead = 0;
5449 rtx_insn *prev = prev_nonnote_insn (jump);
5451 /* First, check if we already have an instruction that satisfies our need. */
5452 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5454 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5455 return prev;
5456 if (GET_CODE (PATTERN (prev)) == USE
5457 || GET_CODE (PATTERN (prev)) == CLOBBER
5458 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5459 prev = jump;
5460 else if ((need_block &= ~1) < 0)
5461 return prev;
5462 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5463 need_block = 0;
5465 if (GET_CODE (PATTERN (jump)) == RETURN)
5467 if (! need_block)
5468 return prev;
5469 /* Reorg even does nasty things with return insns that cause branches
5470 to go out of range - see find_end_label and callers. */
5471 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5473 /* We can't use JUMP_LABEL here because it might be undefined
5474 when not optimizing. */
5475 rtx dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5476 /* If the branch is out of range, try to find a scratch register for it. */
5477 if (optimize
5478 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5479 > 4092 + 4098))
5481 rtx_insn *scan;
5482 /* Don't look for the stack pointer as a scratch register,
5483 it would cause trouble if an interrupt occurred. */
5484 unsigned attempt = 0x7fff, used;
5485 int jump_left = flag_expensive_optimizations + 1;
5487 /* It is likely that the most recent eligible instruction is wanted for
5488 the delay slot. Therefore, find out which registers it uses, and
5489 try to avoid using them. */
5491 for (scan = jump; (scan = PREV_INSN (scan)); )
5493 if (scan->deleted ())
5494 continue;
5495 rtx_code code = GET_CODE (scan);
5496 if (code == CODE_LABEL || code == JUMP_INSN)
5497 break;
5498 if (code == INSN
5499 && GET_CODE (PATTERN (scan)) != USE
5500 && GET_CODE (PATTERN (scan)) != CLOBBER
5501 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5503 attempt &= ~regs_used (PATTERN (scan), 0);
5504 break;
5507 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5508 (scan = NEXT_INSN (scan)); )
5510 if (scan->deleted ())
5511 continue;
5512 rtx_code code = GET_CODE (scan);
5513 if (INSN_P (scan))
5515 used |= regs_used (PATTERN (scan), 0);
5516 if (code == CALL_INSN)
5517 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5518 dead |= (used >> 16) & ~used;
5519 if (dead & attempt)
5521 dead &= attempt;
5522 break;
5524 if (code == JUMP_INSN)
5526 if (jump_left-- && simplejump_p (scan))
5527 scan = JUMP_LABEL_AS_INSN (scan);
5528 else
5529 break;
5533 /* Mask out the stack pointer again, in case it was
5534 the only 'free' register we have found. */
5535 dead &= 0x7fff;
5537 /* If the immediate destination is still in range, check for possible
5538 threading with a jump beyond the delay slot insn.
5539 Don't check if we are called recursively; the jump has been or will be
5540 checked in a different invocation then. */
5542 else if (optimize && need_block >= 0)
5544 rtx_insn *next = next_active_insn (as_a<rtx_insn *> (dest));
5545 next = next_active_insn (next);
5546 if (next && JUMP_P (next)
5547 && GET_CODE (PATTERN (next)) == SET
5548 && recog_memoized (next) == CODE_FOR_jump_compact)
5550 dest = JUMP_LABEL (next);
5551 if (dest
5552 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5553 > 4092 + 4098))
5554 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5558 if (dead)
5560 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5562 /* It would be nice if we could convert the jump into an indirect
5563 jump / far branch right now, and thus exposing all constituent
5564 instructions to further optimization. However, reorg uses
5565 simplejump_p to determine if there is an unconditional jump where
5566 it should try to schedule instructions from the target of the
5567 branch; simplejump_p fails for indirect jumps even if they have
5568 a JUMP_LABEL. */
5569 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5570 (reg, GEN_INT (unspec_bbr_uid++)),
5571 jump);
5572 /* ??? We would like this to have the scope of the jump, but that
5573 scope will change when a delay slot insn of an inner scope is added.
5574 Hence, after delay slot scheduling, we'll have to expect
5575 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5576 the jump. */
5578 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5579 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5580 return insn;
5582 else if (need_block)
5583 /* We can't use JUMP_LABEL here because it might be undefined
5584 when not optimizing. */
5585 return emit_insn_before (gen_block_branch_redirect
5586 (GEN_INT (unspec_bbr_uid++)),
5587 jump);
5588 return prev;
5591 #define CONDJUMP_MIN -252
5592 #define CONDJUMP_MAX 262
5593 struct far_branch
5595 /* A label (to be placed) in front of the jump
5596 that jumps to our ultimate destination. */
5597 rtx_insn *near_label;
5598 /* Where we are going to insert it if we cannot move the jump any farther,
5599 or the jump itself if we have picked up an existing jump. */
5600 rtx_insn *insert_place;
5601 /* The ultimate destination. */
5602 rtx_insn *far_label;
5603 struct far_branch *prev;
5604 /* If the branch has already been created, its address;
5605 else the address of its first prospective user. */
5606 int address;
5609 enum mdep_reorg_phase_e mdep_reorg_phase;
5611 static void
5612 gen_far_branch (struct far_branch *bp)
5614 rtx_insn *insn = bp->insert_place;
5615 rtx_jump_insn *jump;
5616 rtx_code_label *label = gen_label_rtx ();
5618 emit_label_after (label, insn);
5619 if (bp->far_label)
5621 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5622 LABEL_NUSES (bp->far_label)++;
5624 else
5625 jump = emit_jump_insn_after (gen_return (), insn);
5627 /* Emit a barrier so that reorg knows that any following instructions
5628 are not reachable via a fall-through path.
5629 But don't do this when not optimizing, since we wouldn't suppress the
5630 alignment for the barrier then, and could end up with out-of-range
5631 pc-relative loads. */
5632 if (optimize)
5633 emit_barrier_after (jump);
5634 emit_label_after (bp->near_label, insn);
5636 if (bp->far_label)
5637 JUMP_LABEL (jump) = bp->far_label;
5638 else
5640 rtx pat = PATTERN (jump);
5641 gcc_assert (ANY_RETURN_P (pat));
5642 JUMP_LABEL (jump) = pat;
5645 bool ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5646 gcc_assert (ok);
5648 /* If we are branching around a jump (rather than a return), prevent
5649 reorg from using an insn from the jump target as the delay slot insn -
5650 when reorg did this, it pessimized code (we rather hide the delay slot)
5651 and it could cause branches to go out of range. */
5652 if (bp->far_label)
5653 (emit_insn_after
5654 (gen_stuff_delay_slot
5655 (GEN_INT (unspec_bbr_uid++),
5656 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5657 insn));
5658 /* Prevent reorg from undoing our splits. */
5659 gen_block_redirect (jump, bp->address += 2, 2);
5662 /* Fix up ADDR_DIFF_VECs. */
5663 void
5664 fixup_addr_diff_vecs (rtx_insn *first)
5666 rtx_insn *insn;
5668 for (insn = first; insn; insn = NEXT_INSN (insn))
5670 rtx vec_lab, pat, prevpat, x, braf_label;
5671 rtx_insn *prev;
5673 if (! JUMP_TABLE_DATA_P (insn)
5674 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5675 continue;
5676 pat = PATTERN (insn);
5677 vec_lab = XEXP (XEXP (pat, 0), 0);
5679 /* Search the matching casesi_jump_2. */
5680 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5682 if (!JUMP_P (prev))
5683 continue;
5684 prevpat = PATTERN (prev);
5685 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5686 continue;
5687 x = XVECEXP (prevpat, 0, 1);
5688 if (GET_CODE (x) != USE)
5689 continue;
5690 x = XEXP (x, 0);
5691 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5692 break;
5694 /* FIXME: This is a bug in the optimizer, but it seems harmless
5695 to just avoid panicing. */
5696 if (!prev)
5697 continue;
5699 /* Emit the reference label of the braf where it belongs, right after
5700 the casesi_jump_2 (i.e. braf). */
5701 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5702 emit_label_after (braf_label, prev);
5704 /* Fix up the ADDR_DIF_VEC to be relative
5705 to the reference address of the braf. */
5706 XEXP (XEXP (pat, 0), 0) = braf_label;
5710 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5711 a barrier. Return the base 2 logarithm of the desired alignment. */
5713 barrier_align (rtx_insn *barrier_or_label)
5715 if (! barrier_or_label)
5716 return 0;
5718 if (LABEL_P (barrier_or_label)
5719 && NEXT_INSN (barrier_or_label)
5720 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5721 return 2;
5723 if (BARRIER_P (barrier_or_label)
5724 && PREV_INSN (barrier_or_label)
5725 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5727 rtx pat = PATTERN (PREV_INSN (barrier_or_label));
5728 /* If this is a very small table, we want to keep the alignment after
5729 the table to the minimum for proper code alignment. */
5730 return ((optimize_size
5731 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5732 <= (unsigned) 1 << (CACHE_LOG - 2)))
5733 ? 1 : align_jumps_log);
5736 rtx_insn *next = next_active_insn (barrier_or_label);
5738 if (! next)
5739 return 0;
5741 rtx pat = PATTERN (next);
5743 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5744 /* This is a barrier in front of a constant table. */
5745 return 0;
5747 if (optimize_size)
5748 return 0;
5750 if (! TARGET_SH2 || ! optimize)
5751 return align_jumps_log;
5753 /* When fixing up pcloads, a constant table might be inserted just before
5754 the basic block that ends with the barrier. Thus, we can't trust the
5755 instruction lengths before that. */
5756 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5758 /* Check if there is an immediately preceding branch to the insn beyond
5759 the barrier. We must weight the cost of discarding useful information
5760 from the current cache line when executing this branch and there is
5761 an alignment, against that of fetching unneeded insn in front of the
5762 branch target when there is no alignment. */
5764 /* There are two delay_slot cases to consider. One is the simple case
5765 where the preceding branch is to the insn beyond the barrier (simple
5766 delay slot filling), and the other is where the preceding branch has
5767 a delay slot that is a duplicate of the insn after the barrier
5768 (fill_eager_delay_slots) and the branch is to the insn after the insn
5769 after the barrier. */
5771 int slot, credit;
5772 bool jump_to_next = false;
5774 /* Skip to the insn before the JUMP_INSN before the barrier under
5775 investigation. */
5776 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5778 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5779 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5780 prev = prev_real_insn (prev))
5782 jump_to_next = false;
5783 if (GET_CODE (PATTERN (prev)) == USE
5784 || GET_CODE (PATTERN (prev)) == CLOBBER)
5785 continue;
5786 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5788 prev = prev_seq->insn (1);
5789 if (INSN_UID (prev) == INSN_UID (next))
5791 /* Delay slot was filled with insn at jump target. */
5792 jump_to_next = true;
5793 continue;
5797 if (slot &&
5798 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5799 slot = 0;
5800 credit -= get_attr_length (prev);
5802 if (prev && jump_to_label_p (prev))
5804 rtx_insn *x;
5805 if (jump_to_next
5806 || next_real_insn (JUMP_LABEL (prev)) == next
5807 /* If relax_delay_slots() decides NEXT was redundant
5808 with some previous instruction, it will have
5809 redirected PREV's jump to the following insn. */
5810 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5811 /* There is no upper bound on redundant instructions
5812 that might have been skipped, but we must not put an
5813 alignment where none had been before. */
5814 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5815 (INSN_P (x)
5816 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5817 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5818 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5820 rtx pat = PATTERN (prev);
5821 if (GET_CODE (pat) == PARALLEL)
5822 pat = XVECEXP (pat, 0, 0);
5823 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5824 return 0;
5829 return align_jumps_log;
5832 /* If we are inside a phony loop, almost any kind of label can turn up as the
5833 first one in the loop. Aligning a braf label causes incorrect switch
5834 destination addresses; we can detect braf labels because they are
5835 followed by a BARRIER.
5836 Applying loop alignment to small constant or switch tables is a waste
5837 of space, so we suppress this too. */
5839 sh_loop_align (rtx_insn *label)
5841 rtx_insn *next = label;
5843 if (! optimize || optimize_size)
5844 return 0;
5847 next = next_nonnote_insn (next);
5848 while (next && LABEL_P (next));
5850 if (! next
5851 || ! INSN_P (next)
5852 || recog_memoized (next) == CODE_FOR_consttable_2)
5853 return 0;
5855 return align_loops_log;
5858 /* Do a final pass over the function, just before delayed branch
5859 scheduling. */
5860 static void
5861 sh_reorg (void)
5863 rtx_insn *first, *insn, *mova = NULL;
5864 int num_mova;
5865 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5866 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5868 first = get_insns ();
5869 max_labelno_before_reorg = max_label_num ();
5871 /* We must split call insns before introducing `mova's. If we're
5872 optimizing, they'll have already been split. Otherwise, make
5873 sure we don't split them too late. */
5874 if (! optimize)
5875 split_all_insns_noflow ();
5877 /* If relaxing, generate pseudo-ops to associate function calls with
5878 the symbols they call. It does no harm to not generate these
5879 pseudo-ops. However, when we can generate them, it enables the
5880 linker to potentially relax the jsr to a bsr, and eliminate the
5881 register load and, possibly, the constant pool entry. */
5883 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5884 if (TARGET_RELAX)
5886 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5887 own purposes. This works because none of the remaining passes
5888 need to look at them.
5890 ??? But it may break in the future. We should use a machine
5891 dependent REG_NOTE, or some other approach entirely. */
5892 for (insn = first; insn; insn = NEXT_INSN (insn))
5894 if (INSN_P (insn))
5896 rtx note;
5898 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5899 NULL_RTX)) != 0)
5900 remove_note (insn, note);
5904 for (insn = first; insn; insn = NEXT_INSN (insn))
5906 rtx pattern, reg, set, dies;
5907 rtx_code_label *label;
5908 rtx_insn *link, *scan;
5909 int rescan = 0, foundinsn = 0;
5911 if (CALL_P (insn))
5913 pattern = PATTERN (insn);
5915 if (GET_CODE (pattern) == PARALLEL)
5916 pattern = XVECEXP (pattern, 0, 0);
5917 if (GET_CODE (pattern) == SET)
5918 pattern = SET_SRC (pattern);
5920 if (GET_CODE (pattern) != CALL
5921 || !MEM_P (XEXP (pattern, 0)))
5922 continue;
5924 reg = XEXP (XEXP (pattern, 0), 0);
5926 else
5928 reg = sfunc_uses_reg (insn);
5929 if (! reg)
5930 continue;
5933 if (!REG_P (reg))
5934 continue;
5936 /* Try scanning backward to find where the register is set. */
5937 link = NULL;
5938 for (scan = PREV_INSN (insn);
5939 scan && !LABEL_P (scan);
5940 scan = PREV_INSN (scan))
5942 if (! INSN_P (scan))
5943 continue;
5945 if (! reg_mentioned_p (reg, scan))
5946 continue;
5948 if (noncall_uses_reg (reg, scan, &set))
5949 break;
5951 if (set)
5953 link = scan;
5954 break;
5958 if (! link)
5959 continue;
5961 /* The register is set at LINK. */
5963 /* We can only optimize the function call if the register is
5964 being set to a symbol. In theory, we could sometimes
5965 optimize calls to a constant location, but the assembler
5966 and linker do not support that at present. */
5967 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5968 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5969 continue;
5971 /* Scan forward from LINK to the place where REG dies, and
5972 make sure that the only insns which use REG are
5973 themselves function calls. */
5975 /* ??? This doesn't work for call targets that were allocated
5976 by reload, since there may not be a REG_DEAD note for the
5977 register. */
5979 dies = NULL_RTX;
5980 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5982 rtx scanset;
5984 /* Don't try to trace forward past a CODE_LABEL if we haven't
5985 seen INSN yet. Ordinarily, we will only find the setting insn
5986 if it is in the same basic block. However,
5987 cross-jumping can insert code labels in between the load and
5988 the call, and can result in situations where a single call
5989 insn may have two targets depending on where we came from. */
5991 if (LABEL_P (scan) && ! foundinsn)
5992 break;
5994 if (! INSN_P (scan))
5995 continue;
5997 /* Don't try to trace forward past a JUMP. To optimize
5998 safely, we would have to check that all the
5999 instructions at the jump destination did not use REG. */
6001 if (JUMP_P (scan))
6002 break;
6004 if (! reg_mentioned_p (reg, scan))
6005 continue;
6007 if (noncall_uses_reg (reg, scan, &scanset))
6008 break;
6010 if (scan == insn)
6011 foundinsn = 1;
6013 if (scan != insn
6014 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6016 /* There is a function call to this register other
6017 than the one we are checking. If we optimize
6018 this call, we need to rescan again below. */
6019 rescan = 1;
6022 /* ??? We shouldn't have to worry about SCANSET here.
6023 We should just be able to check for a REG_DEAD note
6024 on a function call. However, the REG_DEAD notes are
6025 apparently not dependable around libcalls; c-torture
6026 execute/920501-2 is a test case. If SCANSET is set,
6027 then this insn sets the register, so it must have
6028 died earlier. Unfortunately, this will only handle
6029 the cases in which the register is, in fact, set in a
6030 later insn. */
6032 /* ??? We shouldn't have to use FOUNDINSN here.
6033 This dates back to when we used LOG_LINKS to find
6034 the most recent insn which sets the register. */
6036 if (foundinsn
6037 && (scanset
6038 || find_reg_note (scan, REG_DEAD, reg)))
6040 dies = scan;
6041 break;
6045 if (! dies)
6047 /* Either there was a branch, or some insn used REG
6048 other than as a function call address. */
6049 continue;
6052 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6053 on the insn which sets the register, and on each call insn
6054 which uses the register. In final_prescan_insn we look for
6055 the REG_LABEL_OPERAND notes, and output the appropriate label
6056 or pseudo-op. */
6058 label = gen_label_rtx ();
6059 add_reg_note (link, REG_LABEL_OPERAND, label);
6060 add_reg_note (insn, REG_LABEL_OPERAND, label);
6061 if (rescan)
6063 scan = link;
6066 rtx reg2;
6068 scan = NEXT_INSN (scan);
6069 if (scan != insn
6070 && ((CALL_P (scan)
6071 && reg_mentioned_p (reg, scan))
6072 || ((reg2 = sfunc_uses_reg (scan))
6073 && REGNO (reg2) == REGNO (reg))))
6074 add_reg_note (scan, REG_LABEL_OPERAND, label);
6076 while (scan != dies);
6081 if (TARGET_SH2)
6082 fixup_addr_diff_vecs (first);
6084 if (optimize)
6086 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6087 shorten_branches (first);
6090 /* Scan the function looking for move instructions which have to be
6091 changed to pc-relative loads and insert the literal tables. */
6092 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6093 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6095 if (mova_p (insn))
6097 /* ??? basic block reordering can move a switch table dispatch
6098 below the switch table. Check if that has happened.
6099 We only have the addresses available when optimizing; but then,
6100 this check shouldn't be needed when not optimizing. */
6101 if (!untangle_mova (&num_mova, &mova, insn))
6103 insn = mova;
6104 num_mova = 0;
6107 else if (JUMP_TABLE_DATA_P (insn)
6108 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6109 && num_mova
6110 /* ??? loop invariant motion can also move a mova out of a
6111 loop. Since loop does this code motion anyway, maybe we
6112 should wrap UNSPEC_MOVA into a CONST, so that reload can
6113 move it back. */
6114 && ((num_mova > 1
6115 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6116 || (prev_nonnote_insn (insn)
6117 == XEXP (MOVA_LABELREF (mova), 0))))
6119 rtx_insn *scan;
6120 int total;
6122 num_mova--;
6124 /* Some code might have been inserted between the mova and
6125 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6126 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6127 total += get_attr_length (scan);
6129 /* range of mova is 1020, add 4 because pc counts from address of
6130 second instruction after this one, subtract 2 in case pc is 2
6131 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6132 cancels out with alignment effects of the mova itself. */
6133 if (total > 1022)
6135 /* Change the mova into a load, and restart scanning
6136 there. broken_move will then return true for mova. */
6137 fixup_mova (mova);
6138 insn = mova;
6141 if (broken_move (insn)
6142 || (NONJUMP_INSN_P (insn)
6143 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6145 rtx_insn *scan;
6146 /* Scan ahead looking for a barrier to stick the constant table
6147 behind. */
6148 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6149 rtx_insn *last_float_move = NULL;
6150 rtx last_float = 0, *last_float_addr = NULL;
6151 int need_aligned_label = 0;
6153 if (num_mova && ! mova_p (mova))
6155 /* find_barrier had to change the first mova into a
6156 pcload; thus, we have to start with this new pcload. */
6157 insn = mova;
6158 num_mova = 0;
6160 /* Now find all the moves between the points and modify them. */
6161 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6163 if (LABEL_P (scan))
6164 last_float = 0;
6165 if (NONJUMP_INSN_P (scan)
6166 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6167 need_aligned_label = 1;
6168 if (broken_move (scan))
6170 rtx *patp = &PATTERN (scan), pat = *patp;
6171 rtx src, dst;
6172 rtx lab;
6173 rtx newsrc;
6174 machine_mode mode;
6176 if (GET_CODE (pat) == PARALLEL)
6177 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6178 src = SET_SRC (pat);
6179 dst = SET_DEST (pat);
6180 mode = GET_MODE (dst);
6182 if (mode == SImode && satisfies_constraint_I16 (src)
6183 && REGNO (dst) != FPUL_REG)
6185 int offset = 0;
6187 mode = HImode;
6188 while (GET_CODE (dst) == SUBREG)
6190 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6191 GET_MODE (SUBREG_REG (dst)),
6192 SUBREG_BYTE (dst),
6193 GET_MODE (dst));
6194 dst = SUBREG_REG (dst);
6196 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6198 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6200 /* This must be an insn that clobbers r0. */
6201 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6202 XVECLEN (PATTERN (scan), 0)
6203 - 1);
6204 rtx clobber = *clobberp;
6206 gcc_assert (GET_CODE (clobber) == CLOBBER
6207 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6209 if (last_float
6210 && reg_set_between_p (r0_rtx, last_float_move, scan))
6211 last_float = 0;
6212 lab = add_constant (src, mode, last_float);
6213 if (lab)
6214 emit_insn_before (gen_mova (lab), scan);
6215 else
6217 /* There will be a REG_UNUSED note for r0 on
6218 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6219 lest reorg:mark_target_live_regs will not
6220 consider r0 to be used, and we end up with delay
6221 slot insn in front of SCAN that clobbers r0. */
6222 rtx note
6223 = find_regno_note (last_float_move, REG_UNUSED, 0);
6225 /* If we are not optimizing, then there may not be
6226 a note. */
6227 if (note)
6228 PUT_REG_NOTE_KIND (note, REG_INC);
6230 *last_float_addr = r0_inc_rtx;
6232 last_float_move = scan;
6233 last_float = src;
6234 newsrc = gen_const_mem (mode,
6235 (((TARGET_SH4 && ! TARGET_FMOVD)
6236 || REGNO (dst) == FPUL_REG)
6237 ? r0_inc_rtx
6238 : r0_rtx));
6239 last_float_addr = &XEXP (newsrc, 0);
6241 /* Remove the clobber of r0. */
6242 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6243 gen_rtx_SCRATCH (Pmode));
6245 /* This is a mova needing a label. Create it. */
6246 else if (GET_CODE (src) == UNSPEC
6247 && XINT (src, 1) == UNSPEC_MOVA
6248 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6250 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6251 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6252 newsrc = gen_rtx_UNSPEC (SImode,
6253 gen_rtvec (1, newsrc),
6254 UNSPEC_MOVA);
6256 else if (GET_CODE (src) == UNSPEC_VOLATILE
6257 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6259 newsrc = XVECEXP (src, 0, 0);
6260 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6261 INSN_CODE (scan) = -1;
6262 continue;
6264 else
6266 lab = add_constant (src, mode, 0);
6267 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6268 newsrc = gen_const_mem (mode, newsrc);
6270 *patp = gen_rtx_SET (dst, newsrc);
6271 INSN_CODE (scan) = -1;
6274 dump_table (need_aligned_label ? insn : 0, barrier);
6275 insn = barrier;
6278 label_ref_list_d_pool.release ();
6279 for (insn = first; insn; insn = NEXT_INSN (insn))
6280 PUT_MODE (insn, VOIDmode);
6282 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6283 INSN_ADDRESSES_FREE ();
6284 split_branches (first);
6286 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6287 also has an effect on the register that holds the address of the sfunc.
6288 Insert an extra dummy insn in front of each sfunc that pretends to
6289 use this register. */
6290 if (flag_delayed_branch)
6292 for (insn = first; insn; insn = NEXT_INSN (insn))
6294 rtx reg = sfunc_uses_reg (insn);
6296 if (! reg)
6297 continue;
6298 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6301 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6304 /* Return the UID of the insn that follows the specified label. */
6306 get_dest_uid (rtx label, int max_uid)
6308 rtx_insn *dest = next_real_insn (label);
6310 if (! dest)
6311 /* This can happen for an undefined label. */
6312 return 0;
6313 int dest_uid = INSN_UID (dest);
6314 /* If this is a newly created branch redirection blocking instruction,
6315 we cannot index the branch_uid or insn_addresses arrays with its
6316 uid. But then, we won't need to, because the actual destination is
6317 the following branch. */
6318 while (dest_uid >= max_uid)
6320 dest = NEXT_INSN (dest);
6321 dest_uid = INSN_UID (dest);
6323 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6324 return 0;
6325 return dest_uid;
6328 /* Split condbranches that are out of range. Also add clobbers for
6329 scratch registers that are needed in far jumps.
6330 We do this before delay slot scheduling, so that it can take our
6331 newly created instructions into account. It also allows us to
6332 find branches with common targets more easily. */
6333 static void
6334 split_branches (rtx_insn *first)
6336 rtx_insn *insn;
6337 struct far_branch **uid_branch, *far_branch_list = 0;
6338 int max_uid = get_max_uid ();
6339 int ok;
6341 /* Find out which branches are out of range. */
6342 shorten_branches (first);
6344 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6345 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6347 for (insn = first; insn; insn = NEXT_INSN (insn))
6348 if (! INSN_P (insn))
6349 continue;
6350 else if (insn->deleted ())
6352 /* Shorten_branches would split this instruction again,
6353 so transform it into a note. */
6354 SET_INSN_DELETED (insn);
6356 else if (JUMP_P (insn))
6358 enum attr_type type = get_attr_type (insn);
6359 if (type == TYPE_CBRANCH)
6361 rtx_insn *next, *beyond;
6363 if (get_attr_length (insn) > 4)
6365 rtx src = SET_SRC (PATTERN (insn));
6366 rtx olabel = XEXP (XEXP (src, 1), 0);
6367 int addr = INSN_ADDRESSES (INSN_UID (insn));
6368 rtx_insn *label = 0;
6369 int dest_uid = get_dest_uid (olabel, max_uid);
6370 struct far_branch *bp = uid_branch[dest_uid];
6372 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6373 the label if the LABEL_NUSES count drops to zero. There is
6374 always a jump_optimize pass that sets these values, but it
6375 proceeds to delete unreferenced code, and then if not
6376 optimizing, to un-delete the deleted instructions, thus
6377 leaving labels with too low uses counts. */
6378 if (! optimize)
6380 JUMP_LABEL (insn) = olabel;
6381 LABEL_NUSES (olabel)++;
6383 if (! bp)
6385 bp = (struct far_branch *) alloca (sizeof *bp);
6386 uid_branch[dest_uid] = bp;
6387 bp->prev = far_branch_list;
6388 far_branch_list = bp;
6389 bp->far_label = as_a <rtx_insn *> (
6390 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6391 0));
6392 LABEL_NUSES (bp->far_label)++;
6394 else
6396 label = bp->near_label;
6397 if (! label && bp->address - addr >= CONDJUMP_MIN)
6399 rtx_insn *block = bp->insert_place;
6401 if (GET_CODE (PATTERN (block)) == RETURN)
6402 block = PREV_INSN (block);
6403 else
6404 block = gen_block_redirect (block,
6405 bp->address, 2);
6406 label = emit_label_after (gen_label_rtx (),
6407 PREV_INSN (block));
6408 bp->near_label = label;
6410 else if (label && ! NEXT_INSN (label))
6412 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6413 bp->insert_place = insn;
6414 else
6415 gen_far_branch (bp);
6418 if (! label
6419 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6421 bp->near_label = label = gen_label_rtx ();
6422 bp->insert_place = insn;
6423 bp->address = addr;
6425 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6426 gcc_assert (ok);
6428 else
6430 /* get_attr_length (insn) == 2 */
6431 /* Check if we have a pattern where reorg wants to redirect
6432 the branch to a label from an unconditional branch that
6433 is too far away. */
6434 /* We can't use JUMP_LABEL here because it might be undefined
6435 when not optimizing. */
6436 /* A syntax error might cause beyond to be NULL_RTX. */
6437 rtx temp = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6438 beyond = next_active_insn (as_a<rtx_insn *> (temp));
6440 if (beyond
6441 && (JUMP_P (beyond)
6442 || ((beyond = next_active_insn (beyond))
6443 && JUMP_P (beyond)))
6444 && GET_CODE (PATTERN (beyond)) == SET
6445 && recog_memoized (beyond) == CODE_FOR_jump_compact
6446 && ((INSN_ADDRESSES
6447 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6448 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6449 > 252 + 258 + 2))
6450 gen_block_redirect (beyond,
6451 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6454 next = next_active_insn (insn);
6456 if (next
6457 && (JUMP_P (next)
6458 || ((next = next_active_insn (next))
6459 && JUMP_P (next)))
6460 && GET_CODE (PATTERN (next)) == SET
6461 && recog_memoized (next) == CODE_FOR_jump_compact
6462 && ((INSN_ADDRESSES
6463 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6464 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6465 > 252 + 258 + 2))
6466 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6468 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6470 int addr = INSN_ADDRESSES (INSN_UID (insn));
6471 rtx_insn *far_label = 0;
6472 int dest_uid = 0;
6473 struct far_branch *bp;
6475 if (type == TYPE_JUMP)
6477 if (CROSSING_JUMP_P (insn))
6479 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6480 insn);
6481 continue;
6484 far_label = as_a <rtx_insn *> (
6485 XEXP (SET_SRC (PATTERN (insn)), 0));
6486 dest_uid = get_dest_uid (far_label, max_uid);
6487 if (! dest_uid)
6489 /* Parse errors can lead to labels outside
6490 the insn stream. */
6491 if (! NEXT_INSN (far_label))
6492 continue;
6494 if (! optimize)
6496 JUMP_LABEL (insn) = far_label;
6497 LABEL_NUSES (far_label)++;
6499 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6500 far_label = 0;
6503 bp = uid_branch[dest_uid];
6504 if (! bp)
6506 bp = (struct far_branch *) alloca (sizeof *bp);
6507 uid_branch[dest_uid] = bp;
6508 bp->prev = far_branch_list;
6509 far_branch_list = bp;
6510 bp->near_label = 0;
6511 bp->far_label = far_label;
6512 if (far_label)
6513 LABEL_NUSES (far_label)++;
6515 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6516 if (addr - bp->address <= CONDJUMP_MAX)
6517 emit_label_after (bp->near_label, PREV_INSN (insn));
6518 else
6520 gen_far_branch (bp);
6521 bp->near_label = 0;
6523 else
6524 bp->near_label = 0;
6525 bp->address = addr;
6526 bp->insert_place = insn;
6527 if (! far_label)
6528 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6529 else
6530 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6533 /* Generate all pending far branches,
6534 and free our references to the far labels. */
6535 while (far_branch_list)
6537 if (far_branch_list->near_label
6538 && ! NEXT_INSN (far_branch_list->near_label))
6539 gen_far_branch (far_branch_list);
6540 if (optimize
6541 && far_branch_list->far_label
6542 && ! --LABEL_NUSES (far_branch_list->far_label))
6543 delete_insn (far_branch_list->far_label);
6544 far_branch_list = far_branch_list->prev;
6547 /* Instruction length information is no longer valid due to the new
6548 instructions that have been generated. */
6549 init_insn_lengths ();
6552 /* Dump out instruction addresses, which is useful for debugging the
6553 constant pool table stuff.
6555 If relaxing, output the label and pseudo-ops used to link together
6556 calls and the instruction which set the registers.
6558 ??? The addresses printed by this routine for insns are nonsense for
6559 insns which are inside of a sequence where none of the inner insns have
6560 variable length. This is because the second pass of shorten_branches
6561 does not bother to update them. */
6562 void
6563 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6564 int noperands ATTRIBUTE_UNUSED)
6566 if (TARGET_DUMPISIZE)
6567 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6569 if (TARGET_RELAX)
6571 if (rtx note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX))
6573 rtx pattern = PATTERN (insn);
6574 if (GET_CODE (pattern) == PARALLEL)
6575 pattern = XVECEXP (pattern, 0, 0);
6576 switch (GET_CODE (pattern))
6578 case SET:
6579 if (GET_CODE (SET_SRC (pattern)) != CALL
6580 && get_attr_type (insn) != TYPE_SFUNC)
6582 targetm.asm_out.internal_label
6583 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6584 break;
6586 /* FALLTHROUGH */
6587 case CALL:
6588 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6589 CODE_LABEL_NUMBER (XEXP (note, 0)));
6590 break;
6592 default:
6593 gcc_unreachable ();
6599 /* Dump out any constants accumulated in the final pass. These will
6600 only be labels. */
6601 const char *
6602 output_jump_label_table (void)
6604 if (pool_size)
6606 fprintf (asm_out_file, "\t.align 2\n");
6607 for (int i = 0; i < pool_size; i++)
6609 pool_node *p = &pool_vector[i];
6611 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6612 CODE_LABEL_NUMBER (p->label));
6613 output_asm_insn (".long %O0", &p->value);
6615 pool_size = 0;
6618 return "";
6621 /* A full frame looks like:
6623 arg-5
6624 arg-4
6625 [ if current_function_anonymous_args
6626 arg-3
6627 arg-2
6628 arg-1
6629 arg-0 ]
6630 saved-fp
6631 saved-r10
6632 saved-r11
6633 saved-r12
6634 saved-pr
6635 local-n
6637 local-1
6638 local-0 <- fp points here.
6640 Number of bytes pushed for anonymous args, used to pass information
6641 between expand_prologue and expand_epilogue.
6643 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6644 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6645 for an epilogue and a negative value means that it's for a sibcall
6646 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6647 all the registers that are about to be restored, and hence dead. */
6648 static void
6649 output_stack_adjust (int size, rtx reg, int epilogue_p,
6650 HARD_REG_SET *live_regs_mask, bool frame_p)
6652 rtx_insn *(*emit_fn) (rtx) = frame_p ? &emit_frame_insn : &emit_insn;
6653 if (size)
6655 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6657 /* This test is bogus, as output_stack_adjust is used to re-align the
6658 stack. */
6659 #if 0
6660 gcc_assert (!(size % align));
6661 #endif
6663 if (CONST_OK_FOR_ADD (size))
6664 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6665 /* Try to do it with two partial adjustments; however, we must make
6666 sure that the stack is properly aligned at all times, in case
6667 an interrupt occurs between the two partial adjustments. */
6668 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6669 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6671 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6672 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6674 else
6676 rtx const_reg;
6677 rtx insn;
6678 int temp = epilogue_p ? 7 : 1;
6679 int i;
6681 /* If TEMP is invalid, we could temporarily save a general
6682 register to MACL. However, there is currently no need
6683 to handle this case, so just die when we see it. */
6684 if (epilogue_p < 0
6685 || current_function_interrupt
6686 || ! call_really_used_regs[temp] || fixed_regs[temp])
6687 temp = -1;
6688 if (temp < 0 && ! current_function_interrupt && epilogue_p >= 0)
6690 HARD_REG_SET temps;
6691 COPY_HARD_REG_SET (temps, call_used_reg_set);
6692 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6693 if (epilogue_p > 0)
6695 int nreg = 0;
6696 if (crtl->return_rtx)
6698 machine_mode mode;
6699 mode = GET_MODE (crtl->return_rtx);
6700 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6701 nreg = hard_regno_nregs (FIRST_RET_REG, mode);
6703 for (i = 0; i < nreg; i++)
6704 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6705 if (crtl->calls_eh_return)
6707 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6708 for (i = 0; i <= 3; i++)
6709 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6712 if (epilogue_p <= 0)
6714 for (i = FIRST_PARM_REG;
6715 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6716 CLEAR_HARD_REG_BIT (temps, i);
6717 if (cfun->static_chain_decl != NULL)
6718 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6720 temp = scavenge_reg (&temps);
6722 if (temp < 0 && live_regs_mask)
6724 HARD_REG_SET temps;
6726 COPY_HARD_REG_SET (temps, *live_regs_mask);
6727 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6728 temp = scavenge_reg (&temps);
6730 if (temp < 0)
6732 rtx adj_reg, tmp_reg, mem;
6734 /* If we reached here, the most likely case is the (sibcall)
6735 epilogue. Put a special push/pop sequence for such case as
6736 the last resort. This looks lengthy but would not be problem
6737 because it seems to be very rare. */
6738 gcc_assert (epilogue_p);
6740 /* ??? There is still the slight possibility that r4 or
6741 r5 have been reserved as fixed registers or assigned
6742 as global registers, and they change during an
6743 interrupt. There are possible ways to handle this:
6745 - If we are adjusting the frame pointer (r14), we can do
6746 with a single temp register and an ordinary push / pop
6747 on the stack.
6748 - Grab any call-used or call-saved registers (i.e. not
6749 fixed or globals) for the temps we need. We might
6750 also grab r14 if we are adjusting the stack pointer.
6751 If we can't find enough available registers, issue
6752 a diagnostic and die - the user must have reserved
6753 way too many registers.
6754 But since all this is rather unlikely to happen and
6755 would require extra testing, we just die if r4 / r5
6756 are not available. */
6757 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6758 && !global_regs[4] && !global_regs[5]);
6760 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6761 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6762 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6763 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6764 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6765 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6766 emit_move_insn (mem, tmp_reg);
6767 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6768 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6769 emit_move_insn (mem, tmp_reg);
6770 emit_move_insn (reg, adj_reg);
6771 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6772 emit_move_insn (adj_reg, mem);
6773 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6774 emit_move_insn (tmp_reg, mem);
6775 /* Tell flow the insns that pop r4/r5 aren't dead. */
6776 emit_use (tmp_reg);
6777 emit_use (adj_reg);
6778 return;
6780 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6782 /* If SIZE is negative, subtract the positive value.
6783 This sometimes allows a constant pool entry to be shared
6784 between prologue and epilogue code. */
6785 if (size < 0)
6787 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6788 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6790 else
6792 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6793 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6795 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6796 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
6797 GEN_INT (size))));
6802 /* Emit the specified insn and mark it as frame related. */
6803 static rtx_insn *
6804 emit_frame_insn (rtx x)
6806 rtx_insn *insn = emit_insn (x);
6807 RTX_FRAME_RELATED_P (insn) = 1;
6808 return insn;
6811 /* Output RTL to push register RN onto the stack. */
6812 static rtx
6813 push (int rn)
6815 rtx x;
6816 if (rn == FPUL_REG)
6817 x = gen_push_fpul ();
6818 else if (rn == FPSCR_REG)
6819 x = gen_push_fpscr ();
6820 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6821 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6823 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6824 return NULL_RTX;
6825 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6827 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6828 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6829 else
6830 x = gen_push (gen_rtx_REG (SImode, rn));
6832 x = emit_frame_insn (x);
6833 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6834 return x;
6837 /* Output RTL to pop register RN from the stack. */
6838 static void
6839 pop (int rn)
6841 rtx x, sp_reg, reg;
6842 if (rn == FPUL_REG)
6843 x = gen_pop_fpul ();
6844 else if (rn == FPSCR_REG)
6845 x = gen_pop_fpscr ();
6846 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6847 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6849 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6850 return;
6851 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6853 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6854 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6855 else
6856 x = gen_pop (gen_rtx_REG (SImode, rn));
6858 x = emit_insn (x);
6860 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6861 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6862 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6863 : SET_DEST (PATTERN (x)));
6864 add_reg_note (x, REG_CFA_RESTORE, reg);
6865 add_reg_note (x, REG_CFA_ADJUST_CFA,
6866 gen_rtx_SET (sp_reg,
6867 plus_constant (SImode, sp_reg,
6868 GET_MODE_SIZE (GET_MODE (reg)))));
6869 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6870 RTX_FRAME_RELATED_P (x) = 1;
6873 /* Generate code to push the regs specified in the mask. */
6874 static void
6875 push_regs (HARD_REG_SET *mask, bool interrupt_handler)
6877 bool skip_fpscr = false;
6879 /* Push PR last; this gives better latencies after the prologue, and
6880 candidates for the return delay slot when there are no general
6881 registers pushed. */
6882 for (int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6883 i < FIRST_PSEUDO_REGISTER; i++)
6885 /* If this is an interrupt handler, and the SZ bit varies,
6886 and we have to push any floating point register, we need
6887 to switch to the correct precision first. */
6888 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6889 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6891 HARD_REG_SET unsaved;
6893 push (FPSCR_REG);
6894 COMPL_HARD_REG_SET (unsaved, *mask);
6895 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6896 skip_fpscr = true;
6898 if (i != PR_REG
6899 && (i != FPSCR_REG || ! skip_fpscr)
6900 && TEST_HARD_REG_BIT (*mask, i))
6902 /* If the ISR has RESBANK attribute assigned, don't push any of
6903 the following registers - R0-R14, MACH, MACL and GBR. */
6904 if (! (sh_cfun_resbank_handler_p ()
6905 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6906 || i == MACH_REG
6907 || i == MACL_REG
6908 || i == GBR_REG)))
6909 push (i);
6913 /* Push banked registers last to improve delay slot opportunities. */
6914 if (interrupt_handler)
6916 bool use_movml = false;
6918 if (TARGET_SH2A)
6920 unsigned int count = 0;
6922 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6923 if (TEST_HARD_REG_BIT (*mask, i))
6924 count++;
6925 else
6926 break;
6928 /* Use movml when all banked registers are pushed. */
6929 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6930 use_movml = true;
6933 if (sh_cfun_resbank_handler_p ())
6934 ; /* Do nothing. */
6935 else if (use_movml)
6937 rtx x, mem, reg, set;
6938 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6940 /* We must avoid scheduling multiple store insn with another
6941 insns. */
6942 emit_insn (gen_blockage ());
6943 x = gen_movml_push_banked (sp_reg);
6944 x = emit_frame_insn (x);
6945 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6947 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
6948 reg = gen_rtx_REG (SImode, i);
6949 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6952 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
6953 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6954 emit_insn (gen_blockage ());
6956 else
6957 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6958 if (TEST_HARD_REG_BIT (*mask, i))
6959 push (i);
6962 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6963 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6964 push (PR_REG);
6967 /* Work out the registers which need to be saved, both as a mask and a
6968 count of saved words. Return the count.
6970 If doing a pragma interrupt function, then push all regs used by the
6971 function, and if we call another function (we can tell by looking at PR),
6972 make sure that all the regs it clobbers are safe too. */
6973 static int
6974 calc_live_regs (HARD_REG_SET *live_regs_mask)
6976 unsigned int reg;
6977 tree attrs;
6978 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6979 bool nosave_low_regs;
6981 attrs = DECL_ATTRIBUTES (current_function_decl);
6982 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6983 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6984 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6985 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6987 CLEAR_HARD_REG_SET (*live_regs_mask);
6988 if (TARGET_FPU_DOUBLE && TARGET_FMOVD && interrupt_handler
6989 && df_regs_ever_live_p (FPSCR_REG))
6990 target_flags &= ~MASK_FPU_SINGLE;
6991 /* If we can save a lot of saves by switching to double mode, do that. */
6992 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD && TARGET_FPU_SINGLE)
6993 for (int count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6994 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6995 && (! call_really_used_regs[reg]
6996 || interrupt_handler)
6997 && ++count > 2)
6999 target_flags &= ~MASK_FPU_SINGLE;
7000 break;
7004 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7005 bool pr_live = (pr_initial
7006 ? (!REG_P (pr_initial)
7007 || REGNO (pr_initial) != (PR_REG))
7008 : df_regs_ever_live_p (PR_REG));
7009 /* For Shcompact, if not optimizing, we end up with a memory reference
7010 using the return address pointer for __builtin_return_address even
7011 though there is no actual need to put the PR register on the stack. */
7012 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7014 /* Force PR to be live if the prologue has to call the SHmedia
7015 argument decoder or register saver. */
7016 bool has_call = pr_live;
7018 int count;
7019 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7021 if (reg == PR_REG
7022 ? pr_live
7023 : interrupt_handler
7024 ? (/* Need to save all the regs ever live. */
7025 (df_regs_ever_live_p (reg)
7026 || (call_really_used_regs[reg]
7027 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7028 || reg == PIC_OFFSET_TABLE_REGNUM)
7029 && has_call))
7030 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7031 && reg != RETURN_ADDRESS_POINTER_REGNUM
7032 && reg != T_REG && reg != GBR_REG
7033 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7034 /* Push fpscr only on targets which have FPU */
7035 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7036 : (/* Only push those regs which are used and need to be saved. */
7037 (false)
7038 || (df_regs_ever_live_p (reg)
7039 && ((!call_really_used_regs[reg]
7040 && !(reg != PIC_OFFSET_TABLE_REGNUM
7041 && fixed_regs[reg] && call_used_regs[reg]))
7042 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7043 || (crtl->calls_eh_return
7044 && (reg == EH_RETURN_DATA_REGNO (0)
7045 || reg == EH_RETURN_DATA_REGNO (1)
7046 || reg == EH_RETURN_DATA_REGNO (2)
7047 || reg == EH_RETURN_DATA_REGNO (3)))
7048 || ((reg == MACL_REG || reg == MACH_REG)
7049 && df_regs_ever_live_p (reg)
7050 && sh_cfun_attr_renesas_p ())
7053 SET_HARD_REG_BIT (*live_regs_mask, reg);
7054 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7056 if (TARGET_FPU_DOUBLE && TARGET_FMOVD
7057 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7059 if (FP_REGISTER_P (reg))
7061 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7063 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7064 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7067 else if (XD_REGISTER_P (reg))
7069 /* Must switch to double mode to access these registers. */
7070 target_flags &= ~MASK_FPU_SINGLE;
7074 if (nosave_low_regs && reg == R8_REG)
7075 break;
7078 return count;
7081 /* Code to generate prologue and epilogue sequences */
7083 /* PUSHED is the number of bytes that are being pushed on the
7084 stack for register saves. Return the frame size, padded
7085 appropriately so that the stack stays properly aligned. */
7086 static HOST_WIDE_INT
7087 rounded_frame_size (int pushed)
7089 HOST_WIDE_INT size = get_frame_size ();
7090 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7092 if (ACCUMULATE_OUTGOING_ARGS)
7093 size += crtl->outgoing_args_size;
7095 return ((size + pushed + align - 1) & -align) - pushed;
7098 /* Expand code for the function prologue. */
7099 void
7100 sh_expand_prologue (void)
7102 int save_flags = target_flags;
7103 tree sp_switch_attr
7104 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7106 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7108 /* We have pretend args if we had an object sent partially in registers
7109 and partially on the stack, e.g. a large structure. */
7110 int pretend_args = crtl->args.pretend_args_size;
7111 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7112 && (NPARM_REGS(SImode)
7113 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7114 pretend_args = 0;
7116 output_stack_adjust (-pretend_args, stack_pointer_rtx, 0, NULL, true);
7117 int stack_usage = pretend_args;
7119 /* Emit the code for SETUP_VARARGS. */
7120 if (cfun->stdarg)
7122 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7124 /* Push arg regs as if they'd been provided by caller in stack. */
7125 for (int i = 0; i < NPARM_REGS(SImode); i++)
7127 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7129 if (i >= (NPARM_REGS(SImode)
7130 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7132 break;
7133 push (rn);
7134 stack_usage += GET_MODE_SIZE (SImode);
7139 /* If we're supposed to switch stacks at function entry, do so now. */
7140 if (sp_switch_attr)
7142 rtx lab, newsrc;
7143 /* The argument specifies a variable holding the address of the
7144 stack the interrupt function should switch to/from at entry/exit. */
7145 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7146 const char* s = ggc_strdup (TREE_STRING_POINTER (arg));
7147 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7149 lab = add_constant (sp_switch, SImode, 0);
7150 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7152 emit_insn (gen_sp_switch_1 (newsrc));
7155 HARD_REG_SET live_regs_mask;
7156 int d = calc_live_regs (&live_regs_mask);
7157 /* ??? Maybe we could save some switching if we can move a mode switch
7158 that already happens to be at the function start into the prologue. */
7159 if (target_flags != save_flags && ! current_function_interrupt)
7160 emit_insn (gen_toggle_sz ());
7162 push_regs (&live_regs_mask, current_function_interrupt);
7163 stack_usage += d;
7165 if (flag_pic && !TARGET_FDPIC
7166 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7167 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7169 if (target_flags != save_flags && ! current_function_interrupt)
7170 emit_insn (gen_toggle_sz ());
7172 target_flags = save_flags;
7174 output_stack_adjust (-rounded_frame_size (d),
7175 stack_pointer_rtx, 0, NULL, true);
7176 stack_usage += rounded_frame_size (d);
7178 if (frame_pointer_needed)
7179 emit_frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7181 /* If we are profiling, make sure no instructions are scheduled before
7182 the call to mcount. Similarly if some call instructions are swapped
7183 before frame related insns, it'll confuse the unwinder because
7184 currently SH has no unwind info for function epilogues. */
7185 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7186 emit_insn (gen_blockage ());
7188 if (flag_stack_usage_info)
7189 current_function_static_stack_size = stack_usage;
7192 /* Expand code for the function epilogue. */
7193 void
7194 sh_expand_epilogue (bool sibcall_p)
7196 int save_flags = target_flags;
7197 bool fpscr_deferred = false;
7198 int e = sibcall_p ? -1 : 1;
7200 HARD_REG_SET live_regs_mask;
7201 int d = calc_live_regs (&live_regs_mask);
7203 int save_size = d;
7204 int frame_size = rounded_frame_size (d);
7206 if (frame_pointer_needed)
7208 /* We must avoid scheduling the epilogue with previous basic blocks.
7209 See PR/18032 and PR/40313. */
7210 emit_insn (gen_blockage ());
7211 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7212 &live_regs_mask, true);
7214 /* We must avoid moving the stack pointer adjustment past code
7215 which reads from the local frame, else an interrupt could
7216 occur after the SP adjustment and clobber data in the local
7217 frame. */
7218 emit_insn (gen_blockage ());
7219 emit_frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7221 else if (frame_size)
7223 /* We must avoid moving the stack pointer adjustment past code
7224 which reads from the local frame, else an interrupt could
7225 occur after the SP adjustment and clobber data in the local
7226 frame. */
7227 emit_insn (gen_blockage ());
7228 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7229 &live_regs_mask, true);
7232 /* Pop all the registers. */
7234 if (target_flags != save_flags && ! current_function_interrupt)
7235 emit_insn (gen_toggle_sz ());
7238 int last_reg;
7240 save_size = 0;
7241 /* For an ISR with RESBANK attribute assigned, don't pop PR
7242 register. */
7243 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7244 && !sh_cfun_resbank_handler_p ())
7246 if (!frame_pointer_needed)
7247 emit_insn (gen_blockage ());
7248 pop (PR_REG);
7251 /* Banked registers are popped first to avoid being scheduled in the
7252 delay slot. RTE switches banks before the ds instruction. */
7253 if (current_function_interrupt)
7255 bool use_movml = false;
7257 if (TARGET_SH2A)
7259 unsigned int count = 0;
7261 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7262 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7263 count++;
7264 else
7265 break;
7267 /* Use movml when all banked register are poped. */
7268 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7269 use_movml = true;
7272 if (sh_cfun_resbank_handler_p ())
7273 ; /* Do nothing. */
7274 else if (use_movml)
7276 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7278 /* We must avoid scheduling multiple load insn with another
7279 insns. */
7280 emit_insn (gen_blockage ());
7281 emit_insn (gen_movml_pop_banked (sp_reg));
7282 emit_insn (gen_blockage ());
7284 else
7285 for (int i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7286 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7287 pop (i);
7289 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7291 else
7292 last_reg = FIRST_PSEUDO_REGISTER;
7294 for (int i = 0; i < last_reg; i++)
7296 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7298 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7299 && hard_reg_set_intersect_p (live_regs_mask,
7300 reg_class_contents[DF_REGS]))
7301 fpscr_deferred = true;
7302 /* For an ISR with RESBANK attribute assigned, don't pop
7303 following registers, R0-R14, MACH, MACL and GBR. */
7304 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7305 && ! (sh_cfun_resbank_handler_p ()
7306 && ((j >= FIRST_GENERAL_REG
7307 && j < LAST_GENERAL_REG)
7308 || j == MACH_REG
7309 || j == MACL_REG
7310 || j == GBR_REG)))
7311 pop (j);
7313 if (j == FIRST_FP_REG && fpscr_deferred)
7314 pop (FPSCR_REG);
7317 if (target_flags != save_flags && ! current_function_interrupt)
7318 emit_insn (gen_toggle_sz ());
7319 target_flags = save_flags;
7321 output_stack_adjust (crtl->args.pretend_args_size + save_size,
7322 stack_pointer_rtx, e, NULL, true);
7324 if (crtl->calls_eh_return)
7325 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7326 EH_RETURN_STACKADJ_RTX));
7328 /* Switch back to the normal stack if necessary. */
7329 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7330 emit_insn (gen_sp_switch_2 ());
7332 /* Tell flow the insn that pops PR isn't dead. */
7333 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7334 emit_use (gen_rtx_REG (SImode, PR_REG));
7337 /* Emit code to change the current function's return address to RA.
7338 TEMP is available as a scratch register, if needed. */
7339 void
7340 sh_set_return_address (rtx ra, rtx tmp)
7342 HARD_REG_SET live_regs_mask;
7343 int d = calc_live_regs (&live_regs_mask);
7345 /* If pr_reg isn't life, we can set it directly. */
7346 if (! TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7348 rtx rr = gen_rtx_REG (SImode, PR_REG);
7349 emit_insn (GEN_MOV (rr, ra));
7350 /* Tell flow the register for return isn't dead. */
7351 emit_use (rr);
7352 return;
7355 int pr_offset = rounded_frame_size (d);
7357 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7359 if (frame_pointer_needed)
7360 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7361 else
7362 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7364 tmp = gen_frame_mem (Pmode, tmp);
7365 emit_insn (GEN_MOV (tmp, ra));
7366 /* Tell this store isn't dead. */
7367 emit_use (tmp);
7370 /* Clear variables at function end. */
7371 static void
7372 sh_output_function_epilogue (FILE *)
7376 static rtx
7377 sh_builtin_saveregs (void)
7379 /* First unnamed integer register. */
7380 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7381 /* Number of integer registers we need to save. */
7382 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7383 /* First unnamed SFmode float reg */
7384 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7385 /* Number of SFmode float regs to save. */
7386 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7387 rtx regbuf, fpregs;
7388 int bufsize, regno;
7389 alias_set_type alias_set;
7391 if (!TARGET_FPU_ANY)
7393 error ("__builtin_saveregs not supported by this subtarget");
7394 return const0_rtx;
7397 /* Allocate block of memory for the regs. */
7398 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7399 Or can assign_stack_local accept a 0 SIZE argument? */
7400 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7402 if (n_floatregs & 1)
7404 rtx addr;
7406 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7407 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7408 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7409 regbuf = change_address (regbuf, BLKmode, addr);
7411 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7413 rtx addr, mask;
7415 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7416 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
7417 XEXP (regbuf, 0), 4));
7418 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7419 emit_insn (gen_andsi3 (addr, addr, mask));
7420 regbuf = change_address (regbuf, BLKmode, addr);
7422 else
7423 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7424 alias_set = get_varargs_alias_set ();
7425 set_mem_alias_set (regbuf, alias_set);
7427 /* Save int args.
7428 This is optimized to only save the regs that are necessary. Explicitly
7429 named args need not be saved. */
7430 if (n_intregs > 0)
7431 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7432 adjust_address (regbuf, BLKmode,
7433 n_floatregs * UNITS_PER_WORD),
7434 n_intregs);
7436 /* Save float args.
7437 This is optimized to only save the regs that are necessary. Explicitly
7438 named args need not be saved.
7439 We explicitly build a pointer to the buffer because it halves the insn
7440 count when not optimizing (otherwise the pointer is built for each reg
7441 saved).
7442 We emit the moves in reverse order so that we can use predecrement. */
7444 fpregs = copy_to_mode_reg (Pmode,
7445 plus_constant (Pmode, XEXP (regbuf, 0),
7446 n_floatregs * UNITS_PER_WORD));
7447 if (TARGET_FPU_DOUBLE)
7449 rtx mem;
7450 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7452 emit_insn (gen_addsi3 (fpregs, fpregs,
7453 GEN_INT (-2 * UNITS_PER_WORD)));
7454 mem = change_address (regbuf, DFmode, fpregs);
7455 emit_move_insn (mem,
7456 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7458 regno = first_floatreg;
7459 if (regno & 1)
7461 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7462 mem = change_address (regbuf, SFmode, fpregs);
7463 emit_move_insn (mem,
7464 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
7465 + regno - SH_REG_MSW_OFFSET));
7468 else
7469 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7471 rtx mem;
7473 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7474 mem = change_address (regbuf, SFmode, fpregs);
7475 emit_move_insn (mem,
7476 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7479 /* Return the address of the regbuf. */
7480 return XEXP (regbuf, 0);
7483 /* Define the `__builtin_va_list' type for the ABI. */
7484 static tree
7485 sh_build_builtin_va_list (void)
7487 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7488 tree record, type_decl;
7490 if ((! TARGET_SH2E && ! TARGET_SH4)
7491 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7492 return ptr_type_node;
7494 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7495 type_decl = build_decl (BUILTINS_LOCATION,
7496 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7498 f_next_o = build_decl (BUILTINS_LOCATION,
7499 FIELD_DECL, get_identifier ("__va_next_o"),
7500 ptr_type_node);
7501 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7502 FIELD_DECL,
7503 get_identifier ("__va_next_o_limit"),
7504 ptr_type_node);
7505 f_next_fp = build_decl (BUILTINS_LOCATION,
7506 FIELD_DECL, get_identifier ("__va_next_fp"),
7507 ptr_type_node);
7508 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7509 FIELD_DECL,
7510 get_identifier ("__va_next_fp_limit"),
7511 ptr_type_node);
7512 f_next_stack = build_decl (BUILTINS_LOCATION,
7513 FIELD_DECL, get_identifier ("__va_next_stack"),
7514 ptr_type_node);
7516 DECL_FIELD_CONTEXT (f_next_o) = record;
7517 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7518 DECL_FIELD_CONTEXT (f_next_fp) = record;
7519 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7520 DECL_FIELD_CONTEXT (f_next_stack) = record;
7522 TYPE_STUB_DECL (record) = type_decl;
7523 TYPE_NAME (record) = type_decl;
7524 TYPE_FIELDS (record) = f_next_o;
7525 DECL_CHAIN (f_next_o) = f_next_o_limit;
7526 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7527 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7528 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7530 layout_type (record);
7532 return record;
7535 /* Implement `va_start' for varargs and stdarg. */
7536 static void
7537 sh_va_start (tree valist, rtx nextarg)
7539 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7540 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7541 tree t, u;
7542 int nfp, nint;
7544 if ((! TARGET_SH2E && ! TARGET_SH4)
7545 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7547 std_expand_builtin_va_start (valist, nextarg);
7548 return;
7551 f_next_o = TYPE_FIELDS (va_list_type_node);
7552 f_next_o_limit = DECL_CHAIN (f_next_o);
7553 f_next_fp = DECL_CHAIN (f_next_o_limit);
7554 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7555 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7557 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7558 NULL_TREE);
7559 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7560 valist, f_next_o_limit, NULL_TREE);
7561 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7562 NULL_TREE);
7563 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7564 valist, f_next_fp_limit, NULL_TREE);
7565 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7566 valist, f_next_stack, NULL_TREE);
7568 /* Call __builtin_saveregs. */
7569 u = make_tree (sizetype, expand_builtin_saveregs ());
7570 u = fold_convert (ptr_type_node, u);
7571 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7572 TREE_SIDE_EFFECTS (t) = 1;
7573 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7575 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7576 if (nfp < 8)
7577 nfp = 8 - nfp;
7578 else
7579 nfp = 0;
7580 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
7581 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7582 TREE_SIDE_EFFECTS (t) = 1;
7583 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7585 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7586 TREE_SIDE_EFFECTS (t) = 1;
7587 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7589 nint = crtl->args.info.arg_count[SH_ARG_INT];
7590 if (nint < 4)
7591 nint = 4 - nint;
7592 else
7593 nint = 0;
7594 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
7595 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7596 TREE_SIDE_EFFECTS (t) = 1;
7597 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7599 u = make_tree (ptr_type_node, nextarg);
7600 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7601 TREE_SIDE_EFFECTS (t) = 1;
7602 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7605 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7606 member, return it. */
7607 static tree
7608 find_sole_member (tree type)
7610 tree field, member = NULL_TREE;
7612 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7614 if (TREE_CODE (field) != FIELD_DECL)
7615 continue;
7616 if (!DECL_SIZE (field))
7617 return NULL_TREE;
7618 if (integer_zerop (DECL_SIZE (field)))
7619 continue;
7620 if (member)
7621 return NULL_TREE;
7622 member = field;
7624 return member;
7627 /* Implement `va_arg'. */
7628 static tree
7629 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7630 gimple_seq *post_p ATTRIBUTE_UNUSED)
7632 tree tmp;
7633 tree addr, lab_over = NULL, result = NULL;
7634 tree eff_type;
7636 const bool pass_by_ref =
7637 !VOID_TYPE_P (type)
7638 && targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7640 if (pass_by_ref)
7641 type = build_pointer_type (type);
7643 HOST_WIDE_INT size = int_size_in_bytes (type);
7644 HOST_WIDE_INT rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7645 tree pptr_type_node = build_pointer_type (ptr_type_node);
7647 if ((TARGET_SH2E || TARGET_SH4)
7648 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7650 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7651 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7652 tree lab_false;
7653 tree member;
7655 f_next_o = TYPE_FIELDS (va_list_type_node);
7656 f_next_o_limit = DECL_CHAIN (f_next_o);
7657 f_next_fp = DECL_CHAIN (f_next_o_limit);
7658 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7659 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7661 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7662 NULL_TREE);
7663 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7664 valist, f_next_o_limit, NULL_TREE);
7665 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7666 valist, f_next_fp, NULL_TREE);
7667 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7668 valist, f_next_fp_limit, NULL_TREE);
7669 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7670 valist, f_next_stack, NULL_TREE);
7672 /* Structures with a single member with a distinct mode are passed
7673 like their member. This is relevant if the latter has a REAL_TYPE
7674 or COMPLEX_TYPE type. */
7675 eff_type = type;
7676 while (TREE_CODE (eff_type) == RECORD_TYPE
7677 && (member = find_sole_member (eff_type))
7678 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7679 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7680 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7682 tree field_type = TREE_TYPE (member);
7684 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7685 eff_type = field_type;
7686 else
7688 gcc_assert ((TYPE_ALIGN (eff_type)
7689 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7690 || (TYPE_ALIGN (eff_type)
7691 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7692 break;
7696 bool pass_as_float;
7697 if (TARGET_FPU_DOUBLE)
7699 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7700 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7701 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7702 && size <= 16));
7704 else
7706 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7709 addr = create_tmp_var (pptr_type_node);
7710 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7711 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7713 valist = build_simple_mem_ref (addr);
7715 if (pass_as_float)
7717 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
7718 tree cmp;
7719 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7721 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7722 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7724 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7725 tmp = next_fp_limit;
7726 if (size > 4 && !is_double)
7727 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
7728 tmp = build2 (GE_EXPR, boolean_type_node,
7729 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7730 cmp = build3 (COND_EXPR, void_type_node, tmp,
7731 build1 (GOTO_EXPR, void_type_node,
7732 unshare_expr (lab_false)), NULL_TREE);
7733 if (!is_double)
7734 gimplify_and_add (cmp, pre_p);
7736 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7737 || (is_double || size == 16))
7739 tmp = fold_convert (sizetype, next_fp_tmp);
7740 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7741 size_int (UNITS_PER_WORD));
7742 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
7743 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7745 if (is_double)
7746 gimplify_and_add (cmp, pre_p);
7748 #ifdef FUNCTION_ARG_SCmode_WART
7749 if (TYPE_MODE (eff_type) == SCmode
7750 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7752 tree subtype = TREE_TYPE (eff_type);
7753 tree real, imag;
7755 imag
7756 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7757 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7759 real
7760 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7761 real = get_initialized_tmp_var (real, pre_p, NULL);
7763 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7764 if (type != eff_type)
7765 result = build1 (VIEW_CONVERT_EXPR, type, result);
7766 result = get_initialized_tmp_var (result, pre_p, NULL);
7768 #endif /* FUNCTION_ARG_SCmode_WART */
7770 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7771 gimplify_and_add (tmp, pre_p);
7773 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7774 gimplify_and_add (tmp, pre_p);
7776 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7777 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7778 gimplify_assign (unshare_expr (next_fp_tmp),
7779 unshare_expr (valist), pre_p);
7781 gimplify_assign (unshare_expr (valist),
7782 unshare_expr (next_fp_tmp), post_p);
7783 valist = next_fp_tmp;
7785 else
7787 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
7788 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7789 unshare_expr (next_o_limit));
7790 tmp = build3 (COND_EXPR, void_type_node, tmp,
7791 build1 (GOTO_EXPR, void_type_node,
7792 unshare_expr (lab_false)),
7793 NULL_TREE);
7794 gimplify_and_add (tmp, pre_p);
7796 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7797 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7799 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7800 gimplify_and_add (tmp, pre_p);
7802 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7803 gimplify_and_add (tmp, pre_p);
7805 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7806 gimplify_assign (unshare_expr (next_o),
7807 unshare_expr (next_o_limit), pre_p);
7809 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7810 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7813 if (!result)
7815 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7816 gimplify_and_add (tmp, pre_p);
7820 /* ??? In va-sh.h, there had been code to make values larger than
7821 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7823 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7824 if (result)
7826 gimplify_assign (result, tmp, pre_p);
7827 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7828 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7829 gimplify_and_add (tmp, pre_p);
7831 else
7832 result = tmp;
7834 if (pass_by_ref)
7835 result = build_va_arg_indirect_ref (result);
7837 return result;
7840 /* 64 bit floating points memory transfers are paired single precision loads
7841 or store. So DWARF information needs fixing in little endian (unless
7842 PR=SZ=1 in FPSCR). */
7844 sh_dwarf_register_span (rtx reg)
7846 unsigned regno = REGNO (reg);
7848 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7849 return NULL_RTX;
7851 return
7852 gen_rtx_PARALLEL (VOIDmode,
7853 gen_rtvec (2,
7854 gen_rtx_REG (SFmode, regno + 1),
7855 gen_rtx_REG (SFmode, regno)));
7858 static machine_mode
7859 sh_promote_function_mode (const_tree type, machine_mode mode,
7860 int *punsignedp, const_tree funtype,
7861 int for_return)
7863 if (sh_promote_prototypes (funtype))
7864 return promote_mode (type, mode, punsignedp);
7865 else
7866 return default_promote_function_mode (type, mode, punsignedp, funtype,
7867 for_return);
7870 static bool
7871 sh_promote_prototypes (const_tree type)
7873 if (TARGET_HITACHI)
7874 return false;
7875 if (! type)
7876 return true;
7877 return ! sh_attr_renesas_p (type);
7880 static bool
7881 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7882 const_tree type, bool named ATTRIBUTE_UNUSED)
7884 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7886 if (targetm.calls.must_pass_in_stack (mode, type))
7887 return true;
7889 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7890 wants to know about pass-by-reference semantics for incoming
7891 arguments. */
7892 if (! cum)
7893 return false;
7895 return false;
7898 static bool
7899 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
7900 const_tree type, bool named ATTRIBUTE_UNUSED)
7902 /* ??? How can it possibly be correct to return true only on the
7903 caller side of the equation? Is there someplace else in the
7904 sh backend that's magically producing the copies? */
7905 return (get_cumulative_args (cum)->outgoing
7906 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7907 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7910 static sh_arg_class
7911 get_sh_arg_class (machine_mode mode)
7913 if (TARGET_FPU_ANY && mode == SFmode)
7914 return SH_ARG_FLOAT;
7916 if (TARGET_FPU_DOUBLE
7917 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7918 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT))
7919 return SH_ARG_FLOAT;
7921 return SH_ARG_INT;
7924 /* Round a register number up to a proper boundary for an arg of mode
7925 MODE.
7926 The SH doesn't care about double alignment, so we only
7927 round doubles to even regs when asked to explicitly. */
7928 static int
7929 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
7931 /* FIXME: This used to be a macro and has been copy pasted into this
7932 function as is. Make this more readable. */
7933 return
7934 (((TARGET_ALIGN_DOUBLE
7935 || (TARGET_FPU_DOUBLE
7936 && (mode == DFmode || mode == DCmode)
7937 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
7938 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
7939 ? (cum.arg_count[(int) get_sh_arg_class (mode)]
7940 + (cum.arg_count[(int) get_sh_arg_class (mode)] & 1))
7941 : cum.arg_count[(int) get_sh_arg_class (mode)]);
7944 /* Return true if arg of the specified mode should be passed in a register
7945 or false otherwise. */
7946 static bool
7947 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
7948 const_tree type)
7950 /* FIXME: This used to be a macro and has been copy pasted into this
7951 function as is. Make this more readable. */
7952 return
7953 ((type == 0
7954 || (! TREE_ADDRESSABLE (type)
7955 && (! (TARGET_HITACHI || cum.renesas_abi)
7956 || ! (AGGREGATE_TYPE_P (type)
7957 || (!TARGET_FPU_ANY
7958 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7959 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
7960 && ! cum.force_mem
7961 && (TARGET_SH2E
7962 ? ((mode) == BLKmode
7963 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
7964 + int_size_in_bytes (type))
7965 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
7966 : ((sh_round_reg (cum, mode)
7967 + sh_hard_regno_nregs (BASE_ARG_REG (mode), mode))
7968 <= NPARM_REGS (mode)))
7969 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
7972 static int
7973 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
7974 tree type, bool named ATTRIBUTE_UNUSED)
7976 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7977 int words = 0;
7979 if (sh_pass_in_reg_p (*cum, mode, type)
7980 && !TARGET_FPU_DOUBLE
7981 && (sh_round_reg (*cum, mode)
7982 + (mode != BLKmode
7983 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
7984 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
7985 > NPARM_REGS (mode)))
7986 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
7988 return words * UNITS_PER_WORD;
7992 /* Define where to put the arguments to a function.
7993 Value is zero to push the argument on the stack,
7994 or a hard register in which to store the argument.
7996 MODE is the argument's machine mode.
7997 TYPE is the data type of the argument (as a tree).
7998 This is null for libcalls where that information may
7999 not be available.
8000 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8001 the preceding args and about the function being called.
8002 NAMED is nonzero if this argument is a named parameter
8003 (otherwise it is an extra parameter matching an ellipsis).
8005 On SH the first args are normally in registers
8006 and the rest are pushed. Any arg that starts within the first
8007 NPARM_REGS words is at least partially passed in a register unless
8008 its data type forbids. */
8009 static rtx
8010 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
8011 const_tree type, bool named)
8013 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8015 if (mode == VOIDmode)
8016 return ca->renesas_abi ? const1_rtx : const0_rtx;
8018 if (sh_pass_in_reg_p (*ca, mode, type)
8019 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8021 int regno;
8023 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8024 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8026 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8027 gen_rtx_REG (SFmode,
8028 BASE_ARG_REG (mode)
8029 + (sh_round_reg (*ca, mode) ^ 1)),
8030 const0_rtx);
8031 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8032 gen_rtx_REG (SFmode,
8033 BASE_ARG_REG (mode)
8034 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8035 GEN_INT (4));
8036 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8039 /* If the alignment of a DF value causes an SF register to be
8040 skipped, we will use that skipped register for the next SF
8041 value. */
8042 if ((TARGET_HITACHI || ca->renesas_abi)
8043 && ca->free_single_fp_reg
8044 && mode == SFmode)
8045 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8047 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8048 ^ (mode == SFmode && TARGET_SH4
8049 && TARGET_LITTLE_ENDIAN
8050 && ! TARGET_HITACHI && ! ca->renesas_abi);
8051 return gen_rtx_REG (mode, regno);
8055 return NULL_RTX;
8058 /* Update the data in CUM to advance over an argument
8059 of mode MODE and data type TYPE.
8060 (TYPE is null for libcalls where that information may not be
8061 available.) */
8062 static void
8063 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
8064 const_tree type, bool named ATTRIBUTE_UNUSED)
8066 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8068 if (ca->force_mem)
8069 ca->force_mem = false;
8071 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8073 /* Note that we've used the skipped register. */
8074 if (mode == SFmode && ca->free_single_fp_reg)
8076 ca->free_single_fp_reg = 0;
8077 return;
8079 /* When we have a DF after an SF, there's an SF register that get
8080 skipped in order to align the DF value. We note this skipped
8081 register, because the next SF value will use it, and not the
8082 SF that follows the DF. */
8083 if (mode == DFmode
8084 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
8086 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
8087 + BASE_ARG_REG (mode));
8091 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8092 || sh_pass_in_reg_p (*ca, mode, type))
8093 (ca->arg_count[(int) get_sh_arg_class (mode)]
8094 = (sh_round_reg (*ca, mode)
8095 + (mode == BLKmode
8096 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8097 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
8100 /* The Renesas calling convention doesn't quite fit into this scheme since
8101 the address is passed like an invisible argument, but one that is always
8102 passed in memory. */
8103 static rtx
8104 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8106 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8107 return NULL_RTX;
8108 return gen_rtx_REG (Pmode, 2);
8111 /* Worker function for TARGET_FUNCTION_VALUE.
8113 For the SH, this is like LIBCALL_VALUE, except that we must change the
8114 mode like PROMOTE_MODE does.
8115 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8116 tested here has to be kept in sync with the one in
8117 explow.c:promote_mode. */
8118 static rtx
8119 sh_function_value (const_tree valtype,
8120 const_tree fn_decl_or_type,
8121 bool outgoing ATTRIBUTE_UNUSED)
8123 if (fn_decl_or_type
8124 && !DECL_P (fn_decl_or_type))
8125 fn_decl_or_type = NULL;
8127 return gen_rtx_REG (
8128 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8129 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8130 && (TREE_CODE (valtype) == INTEGER_TYPE
8131 || TREE_CODE (valtype) == ENUMERAL_TYPE
8132 || TREE_CODE (valtype) == BOOLEAN_TYPE
8133 || TREE_CODE (valtype) == REAL_TYPE
8134 || TREE_CODE (valtype) == OFFSET_TYPE))
8135 && sh_promote_prototypes (fn_decl_or_type)
8136 ? SImode : TYPE_MODE (valtype)),
8137 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8140 /* Worker function for TARGET_LIBCALL_VALUE. */
8141 static rtx
8142 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8144 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8147 /* Return true if N is a possible register number of function value. */
8148 static bool
8149 sh_function_value_regno_p (const unsigned int regno)
8151 return regno == FIRST_RET_REG || (TARGET_SH2E && regno == FIRST_FP_RET_REG);
8154 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8155 static bool
8156 sh_return_in_memory (const_tree type, const_tree fndecl)
8158 return TYPE_MODE (type) == BLKmode
8159 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8160 && TREE_CODE (type) == RECORD_TYPE);
8163 /* We actually emit the code in sh_expand_prologue. We used to use
8164 a static variable to flag that we need to emit this code, but that
8165 doesn't when inlining, when functions are deferred and then emitted
8166 later. Fortunately, we already have two flags that are part of struct
8167 function that tell if a function uses varargs or stdarg. */
8168 static void
8169 sh_setup_incoming_varargs (cumulative_args_t ca,
8170 machine_mode mode,
8171 tree type,
8172 int *pretend_arg_size,
8173 int second_time ATTRIBUTE_UNUSED)
8175 gcc_assert (cfun->stdarg);
8176 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8178 int named_parm_regs, anon_parm_regs;
8180 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
8181 + (mode == BLKmode
8182 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8183 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
8184 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8185 if (anon_parm_regs > 0)
8186 *pretend_arg_size = anon_parm_regs * 4;
8190 static bool
8191 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8193 return false;
8196 static bool
8197 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8199 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8201 return ! (TARGET_HITACHI || ca->renesas_abi);
8205 /* Define the offset between two registers, one to be eliminated, and
8206 the other its replacement, at the start of a routine. */
8208 initial_elimination_offset (int from, int to)
8210 const int regs_saved_rounding = 0;
8211 int save_flags = target_flags;
8212 HARD_REG_SET live_regs_mask;
8214 int regs_saved = calc_live_regs (&live_regs_mask);
8216 int total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8217 target_flags = save_flags;
8219 int total_saved_regs_space = regs_saved + regs_saved_rounding;
8221 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8222 return total_saved_regs_space + total_auto_space;
8224 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8225 return total_saved_regs_space + total_auto_space;
8227 /* Initial gap between fp and sp is 0. */
8228 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8229 return 0;
8231 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8232 return rounded_frame_size (0);
8234 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8235 return rounded_frame_size (0);
8237 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8238 && (to == HARD_FRAME_POINTER_REGNUM
8239 || to == STACK_POINTER_REGNUM));
8240 return total_auto_space;
8243 /* Parse the -mfixed-range= option string. */
8244 void
8245 sh_fix_range (const char *const_str)
8247 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8248 REG2 are either register names or register numbers. The effect
8249 of this option is to mark the registers in the range from REG1 to
8250 REG2 as ``fixed'' so they won't be used by the compiler. */
8252 char* str = strcpy ((char*)alloca (strlen (const_str) + 1), const_str);
8254 while (1)
8256 char* dash = strchr (str, '-');
8257 if (!dash)
8259 warning (0, "value of -mfixed-range must have form REG1-REG2");
8260 return;
8262 *dash = '\0';
8263 char* comma = strchr (dash + 1, ',');
8264 if (comma)
8265 *comma = '\0';
8267 int first = decode_reg_name (str);
8268 if (first < 0)
8270 warning (0, "unknown register name: %s", str);
8271 return;
8274 int last = decode_reg_name (dash + 1);
8275 if (last < 0)
8277 warning (0, "unknown register name: %s", dash + 1);
8278 return;
8281 *dash = '-';
8283 if (first > last)
8285 warning (0, "%s-%s is an empty range", str, dash + 1);
8286 return;
8289 for (int i = first; i <= last; ++i)
8290 fixed_regs[i] = call_used_regs[i] = 1;
8292 if (!comma)
8293 break;
8295 *comma = ',';
8296 str = comma + 1;
8300 /* Insert any deferred function attributes from earlier pragmas. */
8301 static void
8302 sh_insert_attributes (tree node, tree *attributes)
8304 if (TREE_CODE (node) != FUNCTION_DECL)
8305 return;
8307 /* We are only interested in fields. */
8308 if (!DECL_P (node))
8309 return;
8311 /* Append the attributes to the deferred attributes. */
8312 *sh_deferred_function_attributes_tail = *attributes;
8313 tree attrs = sh_deferred_function_attributes;
8314 if (!attrs)
8315 return;
8317 /* Some attributes imply or require the interrupt attribute. */
8318 if (!lookup_attribute ("interrupt_handler", attrs)
8319 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8321 /* If we have a trapa_handler, but no interrupt_handler attribute,
8322 insert an interrupt_handler attribute. */
8323 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8324 /* We can't use sh_pr_interrupt here because that's not in the
8325 java frontend. */
8326 attrs
8327 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8328 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8329 if the interrupt attribute is missing, we ignore the attribute
8330 and warn. */
8331 else if (lookup_attribute ("sp_switch", attrs)
8332 || lookup_attribute ("trap_exit", attrs)
8333 || lookup_attribute ("nosave_low_regs", attrs)
8334 || lookup_attribute ("resbank", attrs))
8336 tree *tail;
8338 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8340 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8341 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8342 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8343 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8344 warning (OPT_Wattributes,
8345 "%qE attribute only applies to interrupt functions",
8346 TREE_PURPOSE (attrs));
8347 else
8349 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8350 NULL_TREE);
8351 tail = &TREE_CHAIN (*tail);
8354 attrs = *attributes;
8358 /* Install the processed list. */
8359 *attributes = attrs;
8361 /* Clear deferred attributes. */
8362 sh_deferred_function_attributes = NULL_TREE;
8363 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8365 return;
8368 /*------------------------------------------------------------------------------
8369 Target specific attributes
8370 Supported attributes are:
8372 * interrupt_handler
8373 Specifies this function is an interrupt handler.
8375 * trapa_handler
8376 Like interrupt_handler, but don't save all registers.
8378 * sp_switch
8379 Specifies an alternate stack for an interrupt handler to run on.
8381 * trap_exit
8382 Use a trapa to exit an interrupt function instead of rte.
8384 * nosave_low_regs
8385 Don't save r0..r7 in an interrupt handler function.
8386 This is useful on SH3* and SH4*, which have a separate set of low
8387 regs for user and privileged modes.
8388 This is mainly to be used for non-reentrant interrupt handlers (i.e.
8389 those that run with interrupts disabled and thus can't be
8390 interrupted thenselves).
8392 * renesas
8393 Use Renesas calling/layout conventions (functions and structures).
8395 * resbank
8396 In case of an interrupt handler function, use a register bank to
8397 save registers R0-R14, MACH, MACL, GBR and PR.
8398 This is available only on SH2A targets.
8400 * function_vector
8401 Declares a function to be called using the TBR relative addressing
8402 mode. Takes an argument that specifies the slot number in the table
8403 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
8406 /* Handle a 'resbank' attribute. */
8407 static tree
8408 sh_handle_resbank_handler_attribute (tree * node, tree name,
8409 tree args ATTRIBUTE_UNUSED,
8410 int flags ATTRIBUTE_UNUSED,
8411 bool * no_add_attrs)
8413 if (!TARGET_SH2A)
8415 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8416 name);
8417 *no_add_attrs = true;
8419 if (TREE_CODE (*node) != FUNCTION_DECL)
8421 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8422 name);
8423 *no_add_attrs = true;
8426 return NULL_TREE;
8429 /* Handle an "interrupt_handler" attribute; arguments as in
8430 struct attribute_spec.handler. */
8431 static tree
8432 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8433 tree args ATTRIBUTE_UNUSED,
8434 int flags ATTRIBUTE_UNUSED,
8435 bool *no_add_attrs)
8437 if (TREE_CODE (*node) != FUNCTION_DECL)
8439 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8440 name);
8441 *no_add_attrs = true;
8444 return NULL_TREE;
8447 /* Handle an 'function_vector' attribute; arguments as in
8448 struct attribute_spec.handler. */
8449 static tree
8450 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8451 tree args ATTRIBUTE_UNUSED,
8452 int flags ATTRIBUTE_UNUSED,
8453 bool * no_add_attrs)
8455 if (!TARGET_SH2A)
8457 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8458 name);
8459 *no_add_attrs = true;
8461 else if (TREE_CODE (*node) != FUNCTION_DECL)
8463 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8464 name);
8465 *no_add_attrs = true;
8467 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8469 /* The argument must be a constant integer. */
8470 warning (OPT_Wattributes,
8471 "%qE attribute argument not an integer constant",
8472 name);
8473 *no_add_attrs = true;
8475 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8477 /* The argument value must be between 0 to 255. */
8478 warning (OPT_Wattributes,
8479 "%qE attribute argument should be between 0 to 255",
8480 name);
8481 *no_add_attrs = true;
8483 return NULL_TREE;
8486 /* Returns true if current function has been assigned the attribute
8487 'function_vector'. */
8488 bool
8489 sh2a_is_function_vector_call (rtx x)
8491 if (GET_CODE (x) == SYMBOL_REF
8492 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8494 tree tr = SYMBOL_REF_DECL (x);
8496 if (sh2a_function_vector_p (tr))
8497 return true;
8500 return false;
8503 /* Returns the function vector number, if the attribute
8504 'function_vector' is assigned, otherwise returns zero. */
8506 sh2a_get_function_vector_number (rtx x)
8508 if ((GET_CODE (x) == SYMBOL_REF)
8509 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8511 tree t = SYMBOL_REF_DECL (x);
8513 if (TREE_CODE (t) != FUNCTION_DECL)
8514 return 0;
8516 for (tree list = SH_ATTRIBUTES (t); list; list = TREE_CHAIN (list))
8517 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8518 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8520 return 0;
8522 else
8523 return 0;
8526 /* Handle an "sp_switch" attribute; arguments as in
8527 struct attribute_spec.handler. */
8528 static tree
8529 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8530 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8532 if (TREE_CODE (*node) != FUNCTION_DECL)
8534 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8535 name);
8536 *no_add_attrs = true;
8538 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8540 /* The argument must be a constant string. */
8541 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8542 name);
8543 *no_add_attrs = true;
8546 return NULL_TREE;
8549 /* Handle an "trap_exit" attribute; arguments as in
8550 struct attribute_spec.handler. */
8551 static tree
8552 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8553 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8555 if (TREE_CODE (*node) != FUNCTION_DECL)
8557 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8558 name);
8559 *no_add_attrs = true;
8561 /* The argument specifies a trap number to be used in a trapa instruction
8562 at function exit (instead of an rte instruction). */
8563 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8565 /* The argument must be a constant integer. */
8566 warning (OPT_Wattributes, "%qE attribute argument not an "
8567 "integer constant", name);
8568 *no_add_attrs = true;
8571 return NULL_TREE;
8574 static tree
8575 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8576 tree name ATTRIBUTE_UNUSED,
8577 tree args ATTRIBUTE_UNUSED,
8578 int flags ATTRIBUTE_UNUSED,
8579 bool *no_add_attrs ATTRIBUTE_UNUSED)
8581 return NULL_TREE;
8584 /* True if __attribute__((renesas)) or -mrenesas. */
8585 bool
8586 sh_attr_renesas_p (const_tree td)
8588 if (TARGET_HITACHI)
8589 return true;
8590 if (td == NULL_TREE)
8591 return false;
8592 if (DECL_P (td))
8593 td = TREE_TYPE (td);
8594 if (td == error_mark_node)
8595 return false;
8596 return lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) != NULL_TREE;
8599 /* True if __attribute__((renesas)) or -mrenesas, for the current
8600 function. */
8601 bool
8602 sh_cfun_attr_renesas_p (void)
8604 return sh_attr_renesas_p (current_function_decl);
8607 /* Returns true if the current function has the "interrupt_handler"
8608 attribute set. */
8609 bool
8610 sh_cfun_interrupt_handler_p (void)
8612 return (lookup_attribute ("interrupt_handler",
8613 DECL_ATTRIBUTES (current_function_decl))
8614 != NULL_TREE);
8617 /* Returns true if FUNC has been assigned the attribute
8618 "function_vector". */
8619 bool
8620 sh2a_function_vector_p (tree func)
8622 if (TREE_CODE (func) != FUNCTION_DECL)
8623 return false;
8625 for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list))
8626 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8627 return true;
8629 return false;
8632 /* Returns true if given tree has the "resbank" attribute set. */
8633 bool
8634 sh_cfun_resbank_handler_p (void)
8636 return ((lookup_attribute ("resbank",
8637 DECL_ATTRIBUTES (current_function_decl))
8638 != NULL_TREE)
8639 && (lookup_attribute ("interrupt_handler",
8640 DECL_ATTRIBUTES (current_function_decl))
8641 != NULL_TREE) && TARGET_SH2A);
8644 /* Returns true if the current function has a "trap_exit" attribute set. */
8645 bool
8646 sh_cfun_trap_exit_p (void)
8648 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
8649 != NULL_TREE;
8652 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8653 static const char *
8654 sh_check_pch_target_flags (int old_flags)
8656 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8657 | MASK_SH_E | MASK_HARD_SH4
8658 | MASK_FPU_SINGLE | MASK_SH4))
8659 return _("created and used with different architectures / ABIs");
8660 if ((old_flags ^ target_flags) & MASK_HITACHI)
8661 return _("created and used with different ABIs");
8662 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8663 return _("created and used with different endianness");
8664 return NULL;
8667 /* Predicates used by the templates. */
8669 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
8670 Used only in general_movsrc_operand. */
8671 bool
8672 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8674 switch (REGNO (op))
8676 case PR_REG:
8677 case MACL_REG:
8678 case MACH_REG:
8679 return true;
8681 return false;
8684 /* Returns true if OP is a floating point value with value 0.0. */
8685 bool
8686 fp_zero_operand (rtx op)
8688 if (GET_MODE (op) != SFmode)
8689 return false;
8691 const REAL_VALUE_TYPE* r = CONST_DOUBLE_REAL_VALUE (op);
8692 return real_equal (r, &dconst0) && ! REAL_VALUE_MINUS_ZERO (*r);
8695 /* Returns true if OP is a floating point value with value 1.0. */
8696 bool
8697 fp_one_operand (rtx op)
8699 if (GET_MODE (op) != SFmode)
8700 return false;
8702 return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1);
8705 /* Return the TLS type for TLS symbols. */
8706 enum tls_model
8707 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8709 if (GET_CODE (op) != SYMBOL_REF)
8710 return TLS_MODEL_NONE;
8711 return SYMBOL_REF_TLS_MODEL (op);
8714 /* Return the destination address of a branch. */
8715 static int
8716 branch_dest (rtx branch)
8718 rtx dest = SET_SRC (PATTERN (branch));
8720 if (GET_CODE (dest) == IF_THEN_ELSE)
8721 dest = XEXP (dest, 1);
8723 return INSN_ADDRESSES (INSN_UID (XEXP (dest, 0)));
8726 /* Return nonzero if REG is not used after INSN.
8727 We assume REG is a reload reg, and therefore does
8728 not live past labels. It may live past calls or jumps though. */
8729 bool
8730 reg_unused_after (rtx reg, rtx_insn *insn)
8732 /* If the reg is set by this instruction, then it is safe for our
8733 case. Disregard the case where this is a store to memory, since
8734 we are checking a register used in the store address. */
8735 rtx set = single_set (insn);
8736 if (set && !MEM_P (SET_DEST (set))
8737 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8738 return true;
8740 while ((insn = NEXT_INSN (insn)))
8742 if (!INSN_P (insn))
8743 continue;
8745 rtx_code code = GET_CODE (insn);
8747 #if 0
8748 /* If this is a label that existed before reload, then the register
8749 is dead here. However, if this is a label added by reorg, then
8750 the register may still be live here. We can't tell the difference,
8751 so we just ignore labels completely. */
8752 if (code == CODE_LABEL)
8753 return 1;
8754 /* else */
8755 #endif
8757 if (code == JUMP_INSN)
8758 return false;
8760 /* If this is a sequence, we must handle them all at once.
8761 We could have for instance a call that sets the target register,
8762 and an insn in a delay slot that uses the register. In this case,
8763 we must return 0. */
8764 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8766 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
8767 bool retval = false;
8769 for (int i = 0; i < seq->len (); i++)
8771 rtx_insn *this_insn = seq->insn (i);
8772 rtx set = single_set (this_insn);
8774 if (CALL_P (this_insn))
8775 code = CALL_INSN;
8776 else if (JUMP_P (this_insn))
8778 if (INSN_ANNULLED_BRANCH_P (this_insn))
8779 return false;
8780 code = JUMP_INSN;
8783 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8784 return false;
8785 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8787 if (!MEM_P (SET_DEST (set)))
8788 retval = true;
8789 else
8790 return false;
8792 if (set == NULL_RTX
8793 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8794 return false;
8796 if (retval)
8797 return true;
8798 else if (code == JUMP_INSN)
8799 return false;
8802 rtx set = single_set (insn);
8803 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8804 return false;
8805 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8806 return !MEM_P (SET_DEST (set));
8807 if (set == NULL && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8808 return false;
8810 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8811 return true;
8813 return true;
8817 static GTY(()) rtx t_reg_rtx;
8819 get_t_reg_rtx (void)
8821 if (! t_reg_rtx)
8822 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
8823 return t_reg_rtx;
8826 static GTY(()) tree fpscr_values;
8828 static void
8829 emit_fpu_switch (rtx scratch, int index)
8831 if (fpscr_values == NULL)
8833 tree t = build_index_type (integer_one_node);
8834 t = build_array_type (integer_type_node, t);
8835 t = build_decl (BUILTINS_LOCATION,
8836 VAR_DECL, get_identifier ("__fpscr_values"), t);
8837 DECL_ARTIFICIAL (t) = 1;
8838 DECL_IGNORED_P (t) = 1;
8839 DECL_EXTERNAL (t) = 1;
8840 TREE_STATIC (t) = 1;
8841 TREE_PUBLIC (t) = 1;
8842 TREE_USED (t) = 1;
8844 fpscr_values = t;
8847 rtx src = DECL_RTL (fpscr_values);
8848 if (!can_create_pseudo_p ())
8850 emit_move_insn (scratch, XEXP (src, 0));
8851 if (index != 0)
8852 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8853 src = adjust_automodify_address (src, SImode, scratch, index * 4);
8855 else
8856 src = adjust_address (src, SImode, index * 4);
8858 emit_insn (gen_lds_fpscr (src));
8861 static rtx get_free_reg (HARD_REG_SET);
8863 /* This function returns a register to use to load the address to load
8864 the fpscr from. Currently it always returns r1 or r7, but when we are
8865 able to use pseudo registers after combine, or have a better mechanism
8866 for choosing a register, it should be done here. */
8867 /* REGS_LIVE is the liveness information for the point for which we
8868 need this allocation. In some bare-bones exit blocks, r1 is live at the
8869 start. We can even have all of r0..r3 being live:
8870 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8871 INSN before which new insns are placed with will clobber the register
8872 we return. If a basic block consists only of setting the return value
8873 register to a pseudo and using that register, the return value is not
8874 live before or after this block, yet we we'll insert our insns right in
8875 the middle. */
8876 static rtx
8877 get_free_reg (HARD_REG_SET regs_live)
8879 if (! TEST_HARD_REG_BIT (regs_live, 1))
8880 return gen_rtx_REG (Pmode, 1);
8882 /* Hard reg 1 is live; since this is a small register classes target,
8883 there shouldn't be anything but a jump before the function end. */
8884 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8885 return gen_rtx_REG (Pmode, 7);
8888 /* This function will set the fpscr from memory.
8889 MODE is the mode we are setting it to. */
8890 void
8891 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8893 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
8894 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8896 rtx addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8897 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8900 /* Is the given character a logical line separator for the assembler? */
8901 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8902 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8903 #endif
8905 static bool
8906 sequence_insn_p (rtx_insn *insn)
8908 rtx_insn* prev = PREV_INSN (insn);
8909 if (prev == NULL)
8910 return false;
8912 rtx_insn* next = NEXT_INSN (prev);
8913 if (next == NULL)
8914 return false;
8916 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
8920 sh_insn_length_adjustment (rtx_insn *insn)
8922 /* Instructions with unfilled delay slots take up an extra two bytes for
8923 the nop in the delay slot. */
8924 if (((NONJUMP_INSN_P (insn)
8925 && GET_CODE (PATTERN (insn)) != USE
8926 && GET_CODE (PATTERN (insn)) != CLOBBER)
8927 || CALL_P (insn) || JUMP_P (insn))
8928 && ! sequence_insn_p (insn)
8929 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8930 return 2;
8932 /* Increase the insn length of a cbranch without a delay slot insn to
8933 force a delay slot which will be stuffed with a nop. */
8934 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
8935 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
8936 && ! sequence_insn_p (insn))
8937 return 2;
8939 /* sh-dsp parallel processing insn take four bytes instead of two. */
8941 if (NONJUMP_INSN_P (insn))
8943 int sum = 0;
8944 rtx body = PATTERN (insn);
8945 const char *templ;
8946 char c;
8947 bool maybe_label = true;
8949 if (GET_CODE (body) == ASM_INPUT)
8950 templ = XSTR (body, 0);
8951 else if (asm_noperands (body) >= 0)
8952 templ
8953 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8954 else
8955 return 0;
8958 int ppi_adjust = 0;
8961 c = *templ++;
8962 while (c == ' ' || c == '\t');
8963 /* all sh-dsp parallel-processing insns start with p.
8964 The only non-ppi sh insn starting with p is pref.
8965 The only ppi starting with pr is prnd. */
8966 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8967 ppi_adjust = 2;
8968 /* The repeat pseudo-insn expands two three insns, a total of
8969 six bytes in size. */
8970 else if ((c == 'r' || c == 'R')
8971 && ! strncasecmp ("epeat", templ, 5))
8972 ppi_adjust = 4;
8973 while (c && c != '\n'
8974 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
8976 /* If this is a label, it is obviously not a ppi insn. */
8977 if (c == ':' && maybe_label)
8979 ppi_adjust = 0;
8980 break;
8982 else if (c == '\'' || c == '"')
8983 maybe_label = false;
8984 c = *templ++;
8986 sum += ppi_adjust;
8987 maybe_label = c != ':';
8989 while (c);
8990 return sum;
8992 return 0;
8995 /* Return TRUE for a valid displacement for the REG+disp addressing
8996 with MODE. */
8997 bool
8998 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
8999 bool allow_zero)
9001 if (! CONST_INT_P (op))
9002 return false;
9005 const HOST_WIDE_INT offset = INTVAL (op);
9006 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
9007 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
9009 /* If the mode does not support any displacement always return false.
9010 Even though an index of '0' is actually always valid, it will cause
9011 troubles when e.g. a DFmode move is split into two SFmode moves,
9012 where one SFmode move will have index '0' and the other move will
9013 have index '4'. */
9014 if (!allow_zero && max_disp < 1)
9015 return false;
9017 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
9021 /* Recognize an RTL expression that is a valid memory address for
9022 an instruction.
9023 The MODE argument is the machine mode for the MEM expression
9024 that wants to use this address.
9025 Allow REG
9026 REG+disp
9027 REG+r0
9028 REG++
9029 --REG
9031 GBR+disp */
9032 static bool
9033 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
9035 if (REG_P (x) && REGNO (x) == GBR_REG)
9036 return true;
9038 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9039 return true;
9040 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9041 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9042 return true;
9043 else if (GET_CODE (x) == PLUS)
9045 rtx xop0 = XEXP (x, 0);
9046 rtx xop1 = XEXP (x, 1);
9048 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
9049 return gbr_displacement (xop1, mode);
9051 if (GET_MODE_SIZE (mode) <= 8
9052 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9053 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
9054 return true;
9056 if (GET_MODE_SIZE (mode) <= 4
9057 || (TARGET_FPU_DOUBLE && TARGET_FMOVD && mode == DFmode))
9059 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9060 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9061 return true;
9062 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9063 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9064 return true;
9068 return false;
9071 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9072 isn't protected by a PIC unspec. */
9073 bool
9074 nonpic_symbol_mentioned_p (rtx x)
9076 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9077 || GET_CODE (x) == PC)
9078 return true;
9080 /* We don't want to look into the possible MEM location of a
9081 CONST_DOUBLE, since we're not going to use it, in general. */
9082 if (GET_CODE (x) == CONST_DOUBLE)
9083 return false;
9085 if (GET_CODE (x) == UNSPEC
9086 && (XINT (x, 1) == UNSPEC_PIC
9087 || XINT (x, 1) == UNSPEC_GOT
9088 || XINT (x, 1) == UNSPEC_GOTOFF
9089 || XINT (x, 1) == UNSPEC_GOTPLT
9090 || XINT (x, 1) == UNSPEC_GOTTPOFF
9091 || XINT (x, 1) == UNSPEC_DTPOFF
9092 || XINT (x, 1) == UNSPEC_TPOFF
9093 || XINT (x, 1) == UNSPEC_PLT
9094 || XINT (x, 1) == UNSPEC_PCREL
9095 || XINT (x, 1) == UNSPEC_SYMOFF
9096 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
9097 || XINT (x, 1) == UNSPEC_GOTFUNCDESC
9098 || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
9099 return false;
9101 const char* fmt = GET_RTX_FORMAT (GET_CODE (x));
9102 for (int i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9104 if (fmt[i] == 'E')
9106 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
9107 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9108 return true;
9110 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9111 return true;
9114 return false;
9117 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9118 @GOTOFF in `reg'. */
9120 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg)
9122 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9123 return orig;
9125 if (GET_CODE (orig) == LABEL_REF
9126 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9128 if (reg == NULL_RTX)
9129 reg = gen_reg_rtx (Pmode);
9131 if (TARGET_FDPIC
9132 && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig))
9134 /* Weak functions may be NULL which doesn't work with
9135 GOTOFFFUNCDESC because the runtime offset is not known. */
9136 if (SYMBOL_REF_WEAK (orig))
9137 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9138 else
9139 emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
9141 else if (TARGET_FDPIC
9142 && (GET_CODE (orig) == LABEL_REF
9143 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig)
9144 && (TREE_READONLY (SYMBOL_REF_DECL (orig))
9145 || SYMBOL_REF_EXTERNAL_P (orig)
9146 || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig))))))
9147 /* In FDPIC, GOTOFF can only be used for writable data. */
9148 emit_insn (gen_symGOT2reg (reg, orig));
9149 else
9150 emit_insn (gen_symGOTOFF2reg (reg, orig));
9151 return reg;
9153 else if (GET_CODE (orig) == SYMBOL_REF)
9155 if (reg == NULL_RTX)
9156 reg = gen_reg_rtx (Pmode);
9158 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
9159 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9160 else
9161 emit_insn (gen_symGOT2reg (reg, orig));
9162 return reg;
9164 return orig;
9167 /* Given a (logical) mode size and an offset in bytes, try to find a the
9168 appropriate displacement value for a mov insn. On SH the displacements
9169 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
9170 15 bytes in QImode. To compensate this we create a new base address by
9171 adding an adjustment value to it.
9173 If the originally requested offset is greater than 127 we prefer using
9174 values 124..127 over 128..131 to increase opportunities to use the
9175 add #imm, Rn insn.
9177 In some cases it is possible that a requested offset might seem unaligned
9178 or inappropriate for the mode size, like offset = 2 and mode size = 4.
9179 This is compensated by adjusting the base address so that the effective
9180 address of the displacement move insn will be aligned.
9182 This is not the best possible way of rebasing the base address, as it
9183 does not look at other present displacement addressings around it.
9184 In some cases this can create more base address adjustments than would
9185 actually be necessary. */
9186 struct disp_adjust
9188 rtx offset_adjust;
9189 rtx mov_disp;
9192 static struct disp_adjust
9193 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
9195 struct disp_adjust res = { NULL_RTX, NULL_RTX };
9197 /* Do not try to use SH2A's large displacements here, because this would
9198 effectively disable the small displacement insns. */
9199 const int mode_sz = GET_MODE_SIZE (mode);
9200 const int mov_insn_sz = mov_insn_size (mode, false);
9201 const int max_disp = sh_max_mov_insn_displacement (mode, false);
9202 const int max_disp_next = max_disp + mov_insn_sz;
9203 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
9204 HOST_WIDE_INT offset_adjust;
9206 /* In some cases this actually does happen and we must check for it. */
9207 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
9208 return res;
9210 /* Keeps the previous behavior for QImode displacement addressing.
9211 This just decides how the offset is re-based. Removing this special
9212 case will result in slightly bigger code on average, but it's not that
9213 bad actually. */
9214 if (mov_insn_sz == 1)
9215 align_modifier = 0;
9217 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
9219 if (mode_sz + offset - offset_adjust <= max_disp_next)
9221 res.offset_adjust = GEN_INT (offset_adjust);
9222 res.mov_disp = GEN_INT (offset - offset_adjust);
9225 return res;
9228 /* Try to modify an illegitimate address and make it legitimate.
9229 If we find one, return the new, valid address.
9230 Otherwise, return the original address. */
9231 static rtx
9232 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9234 if (flag_pic)
9235 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9237 if ((TARGET_FPU_DOUBLE && mode == DFmode)
9238 || (TARGET_SH2E && mode == SFmode))
9239 return x;
9241 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
9242 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
9244 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
9245 INTVAL (XEXP (x, 1)));
9247 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9249 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9250 adj.offset_adjust, NULL_RTX, 0,
9251 OPTAB_LIB_WIDEN);
9252 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9255 return x;
9258 /* Attempt to replace *p, which is an address that needs reloading, with
9259 a valid memory address for an operand of mode MODE.
9260 Like for sh_legitimize_address, for the SH we try to get a normal form
9261 of the address. That will allow inheritance of the address reloads. */
9262 bool
9263 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9264 int itype)
9266 enum reload_type type = (enum reload_type) itype;
9267 const int mode_sz = GET_MODE_SIZE (mode);
9269 if (sh_lra_p ())
9270 return false;
9272 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
9273 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true))
9275 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
9276 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
9278 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9280 push_reload (*p, NULL_RTX, p, NULL,
9281 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9282 return true;
9285 if (TARGET_SH2E && mode == SFmode)
9287 *p = copy_rtx (*p);
9288 push_reload (*p, NULL_RTX, p, NULL,
9289 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9290 return true;
9293 /* FIXME: Do not allow to legitimize QImode and HImode displacement
9294 moves because then reload has a problem figuring the constraint
9295 that the move insn target/source reg must be R0.
9296 Or maybe some handling is wrong in sh_secondary_reload for this
9297 to work properly? */
9298 if ((mode_sz == 4 || mode_sz == 8)
9299 && ! (TARGET_SH4 && mode == DFmode)
9300 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9302 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
9303 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9304 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9305 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9306 return true;
9310 /* We must re-recognize what we created before. */
9311 if (GET_CODE (*p) == PLUS
9312 && (mode_sz == 4 || mode_sz == 8)
9313 && GET_CODE (XEXP (*p, 0)) == PLUS
9314 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9315 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9316 && CONST_INT_P (XEXP (*p, 1))
9317 && ! (TARGET_SH2E && mode == SFmode))
9319 /* Because this address is so complex, we know it must have
9320 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9321 it is already unshared, and needs no further unsharing. */
9322 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9323 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9324 return true;
9327 return false;
9330 /* In the name of slightly smaller debug output, and to cater to
9331 general assembler lossage, recognize various UNSPEC sequences
9332 and turn them back into a direct symbol reference. */
9333 static rtx
9334 sh_delegitimize_address (rtx orig_x)
9336 orig_x = delegitimize_mem_from_attrs (orig_x);
9338 rtx x = orig_x;
9339 if (MEM_P (x))
9340 x = XEXP (x, 0);
9341 if (GET_CODE (x) == CONST)
9343 rtx y = XEXP (x, 0);
9344 if (GET_CODE (y) == UNSPEC)
9346 if (XINT (y, 1) == UNSPEC_GOT
9347 || XINT (y, 1) == UNSPEC_GOTOFF
9348 || XINT (y, 1) == UNSPEC_SYMOFF)
9349 return XVECEXP (y, 0, 0);
9350 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
9352 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
9354 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
9356 if (GET_CODE (symplt) == UNSPEC
9357 && (XINT (symplt, 1) == UNSPEC_PLT
9358 || XINT (symplt, 1) == UNSPEC_PCREL))
9359 return XVECEXP (symplt, 0, 0);
9365 return orig_x;
9368 /* Mark the use of a constant in the literal table. If the constant
9369 has multiple labels, make it unique. */
9370 static rtx
9371 mark_constant_pool_use (rtx x)
9373 if (x == NULL_RTX)
9374 return x;
9376 switch (GET_CODE (x))
9378 case LABEL_REF:
9379 x = XEXP (x, 0);
9380 case CODE_LABEL:
9381 break;
9382 default:
9383 return x;
9386 /* Get the first label in the list of labels for the same constant
9387 and delete another labels in the list. */
9388 rtx_insn* lab = as_a <rtx_insn*> (x);
9389 for (rtx_insn* insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
9391 if (!LABEL_P (insn)
9392 || LABEL_REFS (insn) != NEXT_INSN (insn))
9393 break;
9394 lab = insn;
9397 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9398 as_a<rtx_insn *> (insn)->set_deleted ();
9400 /* Mark constants in a window. */
9401 for (rtx_insn* insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn;
9402 insn = NEXT_INSN (insn))
9404 if (!NONJUMP_INSN_P (insn))
9405 continue;
9407 rtx pattern = PATTERN (insn);
9408 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9409 continue;
9411 switch (XINT (pattern, 1))
9413 case UNSPECV_CONST2:
9414 case UNSPECV_CONST4:
9415 case UNSPECV_CONST8:
9416 XVECEXP (pattern, 0, 1) = const1_rtx;
9417 break;
9418 case UNSPECV_WINDOW_END:
9419 if (XVECEXP (pattern, 0, 0) == x)
9420 return lab;
9421 break;
9422 case UNSPECV_CONST_END:
9423 return lab;
9424 default:
9425 break;
9429 return lab;
9432 /* Return true if it's possible to redirect BRANCH1 to the destination
9433 of an unconditional jump BRANCH2. We only want to do this if the
9434 resulting branch will have a short displacement. */
9435 static bool
9436 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
9438 /* Don't follow if BRANCH2 is possible to be a jump crossing between
9439 hot and cold partitions. */
9440 if (flag_reorder_blocks_and_partition
9441 && simplejump_p (branch2)
9442 && CROSSING_JUMP_P (branch2))
9443 return false;
9445 if (flag_expensive_optimizations && simplejump_p (branch2))
9447 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9448 rtx_insn *insn;
9449 int distance;
9451 for (distance = 0, insn = NEXT_INSN (branch1);
9452 insn && distance < 256;
9453 insn = PREV_INSN (insn))
9455 if (insn == dest)
9456 return true;
9457 else
9458 distance += get_attr_length (insn);
9460 for (distance = 0, insn = NEXT_INSN (branch1);
9461 insn && distance < 256;
9462 insn = NEXT_INSN (insn))
9464 if (insn == dest)
9465 return true;
9466 else
9467 distance += get_attr_length (insn);
9470 return false;
9473 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9474 bool
9475 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9476 unsigned int new_reg)
9478 /* Interrupt functions can only use registers that have already been
9479 saved by the prologue, even if they would normally be
9480 call-clobbered. */
9481 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9482 return false;
9484 return true;
9487 /* Function to update the integer COST
9488 based on the relationship between INSN that is dependent on
9489 DEP_INSN through the dependence LINK. The default is to make no
9490 adjustment to COST. This can be used for example to specify to
9491 the scheduler that an output- or anti-dependence does not incur
9492 the same cost as a data-dependence. The return value should be
9493 the new value for COST. */
9494 static int
9495 sh_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
9496 unsigned int)
9498 rtx reg, use_pat;
9500 if (dep_type == 0)
9502 if (recog_memoized (insn) < 0
9503 || recog_memoized (dep_insn) < 0)
9504 return cost;
9506 rtx dep_set = single_set (dep_insn);
9508 /* The latency that we specify in the scheduling description refers
9509 to the actual output, not to an auto-increment register; for that,
9510 the latency is one. */
9511 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9513 rtx set = single_set (insn);
9515 if (set
9516 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9517 && (!MEM_P (SET_DEST (set))
9518 || !reg_mentioned_p (SET_DEST (dep_set),
9519 XEXP (SET_DEST (set), 0))))
9520 cost = 1;
9522 /* The only input for a call that is timing-critical is the
9523 function's address. */
9524 if (CALL_P (insn))
9526 rtx call = get_call_rtx_from (insn);
9527 if (call
9528 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9529 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9530 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9531 cost -= TARGET_SH4_300 ? 3 : 6;
9533 /* Likewise, the most timing critical input for an sfuncs call
9534 is the function address. However, sfuncs typically start
9535 using their arguments pretty quickly.
9536 Assume a four cycle delay for SH4 before they are needed.
9537 Cached ST40-300 calls are quicker, so assume only a one
9538 cycle delay there.
9539 ??? Maybe we should encode the delays till input registers
9540 are needed by sfuncs into the sfunc call insn. */
9541 /* All sfunc calls are parallels with at least four components.
9542 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9543 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9544 && XVECLEN (PATTERN (insn), 0) >= 4
9545 && (reg = sfunc_uses_reg (insn)))
9547 if (! reg_set_p (reg, dep_insn))
9548 cost -= TARGET_SH4_300 ? 1 : 4;
9550 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9552 attr_type dep_type = get_attr_type (dep_insn);
9553 attr_type type;
9554 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9555 cost--;
9556 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9557 && (type = get_attr_type (insn)) != TYPE_CALL
9558 && type != TYPE_SFUNC)
9559 cost--;
9560 /* When the preceding instruction loads the shift amount of
9561 the following SHAD/SHLD, the latency of the load is increased
9562 by 1 cycle. */
9563 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9564 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9565 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9566 XEXP (SET_SRC (single_set (insn)),
9567 1)))
9568 cost++;
9569 /* When an LS group instruction with a latency of less than
9570 3 cycles is followed by a double-precision floating-point
9571 instruction, FIPR, or FTRV, the latency of the first
9572 instruction is increased to 3 cycles. */
9573 else if (cost < 3
9574 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9575 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9576 cost = 3;
9577 /* The lsw register of a double-precision computation is ready one
9578 cycle earlier. */
9579 else if (reload_completed
9580 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9581 && (use_pat = single_set (insn))
9582 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9583 SET_SRC (use_pat)))
9584 cost -= 1;
9586 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9587 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9588 cost -= 1;
9590 else if (TARGET_SH4_300)
9592 /* Stores need their input register two cycles later. */
9593 attr_type type;
9594 if (dep_set && cost >= 1
9595 && ((type = get_attr_type (insn)) == TYPE_STORE
9596 || type == TYPE_PSTORE
9597 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9599 rtx set = single_set (insn);
9601 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9602 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9604 cost -= 2;
9605 /* But don't reduce the cost below 1 if the address depends
9606 on a side effect of dep_insn. */
9607 if (cost < 1
9608 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9609 cost = 1;
9614 /* An anti-dependence penalty of two applies if the first insn is a double
9615 precision fadd / fsub / fmul. */
9616 else if (!TARGET_SH4_300
9617 && dep_type == REG_DEP_ANTI
9618 && recog_memoized (dep_insn) >= 0
9619 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9620 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9621 /* A lot of alleged anti-flow dependences are fake,
9622 so check this one is real. */
9623 && flow_dependent_p (dep_insn, insn))
9624 cost = 2;
9626 return cost;
9629 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9630 if DEP_INSN is anti-flow dependent on INSN. */
9631 static bool
9632 flow_dependent_p (rtx insn, rtx dep_insn)
9634 rtx tmp = PATTERN (insn);
9636 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9637 return tmp == NULL_RTX;
9640 /* A helper function for flow_dependent_p called through note_stores. */
9641 static void
9642 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9644 rtx * pinsn = (rtx *) data;
9646 if (*pinsn && reg_referenced_p (x, *pinsn))
9647 *pinsn = NULL_RTX;
9650 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9651 'special function' patterns (type sfunc) that clobber pr, but that
9652 do not look like function calls to leaf_function_p. Hence we must
9653 do this extra check. */
9654 static int
9655 sh_pr_n_sets (void)
9657 return DF_REG_DEF_COUNT (PR_REG);
9660 /* Return where to allocate pseudo for a given hard register initial
9661 value. */
9662 static rtx
9663 sh_allocate_initial_value (rtx hard_reg)
9665 if (REGNO (hard_reg) == PR_REG)
9667 if (crtl->is_leaf && ! sh_pr_n_sets ())
9668 return hard_reg;
9669 else
9670 return gen_frame_mem (Pmode, return_address_pointer_rtx);
9673 return NULL_RTX;
9676 /* This function returns "2" to indicate dual issue for the SH4
9677 processor. To be used by the DFA pipeline description. */
9678 static int
9679 sh_issue_rate (void)
9681 if (TARGET_SUPERSCALAR)
9682 return 2;
9683 else
9684 return 1;
9687 /* Functions for ready queue reordering for sched1. */
9689 /* Get weight for mode for a set x. */
9690 static short
9691 find_set_regmode_weight (rtx x, machine_mode mode)
9693 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9694 return 1;
9695 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9697 if (REG_P (SET_DEST (x)))
9699 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9700 return 1;
9701 else
9702 return 0;
9704 return 1;
9706 return 0;
9709 /* Get regmode weight for insn. */
9710 static short
9711 find_insn_regmode_weight (rtx insn, machine_mode mode)
9713 /* Increment weight for each register born here. */
9714 rtx x = PATTERN (insn);
9715 short reg_weight = find_set_regmode_weight (x, mode);
9716 if (GET_CODE (x) == PARALLEL)
9718 int j;
9719 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9721 x = XVECEXP (PATTERN (insn), 0, j);
9722 reg_weight += find_set_regmode_weight (x, mode);
9725 /* Decrement weight for each register that dies here. */
9726 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9728 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9730 rtx note = XEXP (x, 0);
9731 if (REG_P (note) && GET_MODE (note) == mode)
9732 reg_weight--;
9735 return reg_weight;
9738 /* Calculate regmode weights for all insns of a basic block. */
9739 static void
9740 find_regmode_weight (basic_block b, machine_mode mode)
9742 rtx_insn *insn, *next_tail, *head, *tail;
9744 get_ebb_head_tail (b, b, &head, &tail);
9745 next_tail = NEXT_INSN (tail);
9747 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9749 /* Handle register life information. */
9750 if (!INSN_P (insn))
9751 continue;
9753 if (mode == SFmode)
9754 INSN_REGMODE_WEIGHT (insn, mode) =
9755 find_insn_regmode_weight (insn, mode)
9756 + 2 * find_insn_regmode_weight (insn, DFmode);
9757 else if (mode == SImode)
9758 INSN_REGMODE_WEIGHT (insn, mode) =
9759 find_insn_regmode_weight (insn, mode)
9760 + 2 * find_insn_regmode_weight (insn, DImode);
9764 /* Comparison function for ready queue sorting. */
9765 static int
9766 rank_for_reorder (const void *x, const void *y)
9768 rtx_insn *tmp = *(rtx_insn * const *) y;
9769 rtx_insn *tmp2 = *(rtx_insn * const *) x;
9771 /* The insn in a schedule group should be issued the first. */
9772 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9773 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9775 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9776 minimizes instruction movement, thus minimizing sched's effect on
9777 register pressure. */
9778 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9781 /* Resort the array A in which only element at index N may be out of order. */
9782 static void
9783 swap_reorder (rtx_insn **a, int n)
9785 rtx_insn *insn = a[n - 1];
9786 int i = n - 2;
9788 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9790 a[i + 1] = a[i];
9791 i -= 1;
9793 a[i + 1] = insn;
9796 /* Sort the ready list by ascending priority. */
9797 static void
9798 ready_reorder (rtx_insn **ready, int nready)
9800 if (nready == 2)
9801 swap_reorder (ready, nready);
9802 else if (nready > 2)
9803 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
9806 /* Count life regions of r0 for a block. */
9807 static int
9808 find_r0_life_regions (basic_block b)
9810 bool live;
9811 int set;
9812 int death = 0;
9814 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9816 set = 1;
9817 live = true;
9819 else
9821 set = 0;
9822 live = false;
9825 rtx_insn* insn = BB_HEAD (b);
9826 rtx_insn* end = BB_END (b);
9827 rtx r0_reg = gen_rtx_REG (SImode, R0_REG);
9828 while (1)
9830 if (INSN_P (insn))
9832 if (find_regno_note (insn, REG_DEAD, R0_REG))
9834 death++;
9835 live = false;
9838 rtx pset;
9839 if (!live
9840 && (pset = single_set (insn))
9841 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9842 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9844 set++;
9845 live = true;
9848 if (insn == end)
9849 break;
9850 insn = NEXT_INSN (insn);
9852 return set - death;
9855 /* Calculate regmode weights for all insns of all basic block. */
9856 static void
9857 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9858 int verbose ATTRIBUTE_UNUSED,
9859 int old_max_uid)
9861 basic_block b;
9863 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9864 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9865 r0_life_regions = 0;
9867 FOR_EACH_BB_REVERSE_FN (b, cfun)
9869 find_regmode_weight (b, SImode);
9870 find_regmode_weight (b, SFmode);
9871 if (!reload_completed)
9872 r0_life_regions += find_r0_life_regions (b);
9875 CURR_REGMODE_PRESSURE (SImode) = 0;
9876 CURR_REGMODE_PRESSURE (SFmode) = 0;
9879 /* Cleanup. */
9880 static void
9881 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9882 int verbose ATTRIBUTE_UNUSED)
9884 if (regmode_weight[0])
9886 free (regmode_weight[0]);
9887 regmode_weight[0] = NULL;
9889 if (regmode_weight[1])
9891 free (regmode_weight[1]);
9892 regmode_weight[1] = NULL;
9896 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9897 keep count of register pressures on SImode and SFmode. */
9898 static int
9899 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9900 int sched_verbose ATTRIBUTE_UNUSED,
9901 rtx_insn *insn,
9902 int can_issue_more)
9904 if (GET_CODE (PATTERN (insn)) != USE
9905 && GET_CODE (PATTERN (insn)) != CLOBBER)
9906 cached_can_issue_more = can_issue_more - 1;
9907 else
9908 cached_can_issue_more = can_issue_more;
9910 if (reload_completed)
9911 return cached_can_issue_more;
9913 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9914 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9916 return cached_can_issue_more;
9919 static void
9920 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9921 int verbose ATTRIBUTE_UNUSED,
9922 int veclen ATTRIBUTE_UNUSED)
9924 CURR_REGMODE_PRESSURE (SImode) = 0;
9925 CURR_REGMODE_PRESSURE (SFmode) = 0;
9928 /* Some magic numbers. */
9929 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9930 functions that already have high pressure on r0. */
9931 #define R0_MAX_LIFE_REGIONS 2
9932 /* Register Pressure thresholds for SImode and SFmode registers. */
9933 #define SIMODE_MAX_WEIGHT 5
9934 #define SFMODE_MAX_WEIGHT 10
9936 /* Return true if the pressure is high for MODE. */
9937 static bool
9938 high_pressure (machine_mode mode)
9940 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9941 functions that already have high pressure on r0. */
9942 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9943 return true;
9945 if (mode == SFmode)
9946 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9947 else
9948 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9951 /* Reorder ready queue if register pressure is high. */
9952 static int
9953 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9954 int sched_verbose ATTRIBUTE_UNUSED,
9955 rtx_insn **ready,
9956 int *n_readyp,
9957 int clock_var ATTRIBUTE_UNUSED)
9959 if (reload_completed)
9960 return sh_issue_rate ();
9962 if (high_pressure (SFmode) || high_pressure (SImode))
9964 ready_reorder (ready, *n_readyp);
9967 return sh_issue_rate ();
9970 /* Skip cycles if the current register pressure is high. */
9971 static int
9972 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9973 int sched_verbose ATTRIBUTE_UNUSED,
9974 rtx_insn **ready ATTRIBUTE_UNUSED,
9975 int *n_readyp ATTRIBUTE_UNUSED,
9976 int clock_var ATTRIBUTE_UNUSED)
9978 if (reload_completed)
9979 return cached_can_issue_more;
9981 if (high_pressure(SFmode) || high_pressure (SImode))
9982 skip_cycles = 1;
9984 return cached_can_issue_more;
9987 /* Skip cycles without sorting the ready queue. This will move insn from
9988 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9989 queue by sh_reorder. */
9991 /* Generally, skipping these many cycles are sufficient for all insns to move
9992 from Q -> R. */
9993 #define MAX_SKIPS 8
9995 static int
9996 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9997 int sched_verbose ATTRIBUTE_UNUSED,
9998 rtx_insn *insn ATTRIBUTE_UNUSED,
9999 int last_clock_var,
10000 int clock_var,
10001 int *sort_p)
10003 if (reload_completed)
10004 return 0;
10006 if (skip_cycles)
10008 if ((clock_var - last_clock_var) < MAX_SKIPS)
10010 *sort_p = 0;
10011 return 1;
10013 /* If this is the last cycle we are skipping, allow reordering of R. */
10014 if ((clock_var - last_clock_var) == MAX_SKIPS)
10016 *sort_p = 1;
10017 return 1;
10021 skip_cycles = 0;
10023 return 0;
10026 static bool
10027 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10029 return TARGET_HITACHI || sh_attr_renesas_p (record_type);
10033 On the SH1..SH4, the trampoline looks like
10034 2 0002 D202 mov.l l2,r2
10035 1 0000 D301 mov.l l1,r3
10036 3 0004 422B jmp @r2
10037 4 0006 0009 nop
10038 5 0008 00000000 l1: .long area
10039 6 000c 00000000 l2: .long function
10041 FDPIC needs a form that includes a function descriptor and
10042 code to load the GOT register:
10043 0 0000 00000000 .long l0
10044 1 0004 00000000 .long gotval
10045 2 0008 D302 l0: mov.l l1,r3
10046 3 000a D203 mov.l l2,r2
10047 4 000c 6122 mov.l @r2,r1
10048 5 000e 5C21 mov.l @(4,r2),r12
10049 6 0010 412B jmp @r1
10050 7 0012 0009 nop
10051 8 0014 00000000 l1: .long area
10052 9 0018 00000000 l2: .long function
10054 SH5 (compact) uses r1 instead of r3 for the static chain. */
10056 /* Emit insns to store a value at memory address + offset. */
10057 static void
10058 sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value)
10060 gcc_assert ((offset & 3) == 0);
10061 emit_move_insn (offset == 0
10062 ? change_address (addr, SImode, NULL_RTX)
10063 : adjust_address (addr, SImode, offset), value);
10066 /* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2. */
10067 static void
10068 sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1)
10070 sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN
10071 ? (w0 | (w1 << 16))
10072 : (w1 | (w0 << 16)), SImode));
10075 /* Emit RTL insns to initialize the variable parts of a trampoline.
10076 FNADDR is an RTX for the address of the function's pure code.
10077 CXT is an RTX for the static chain value for the function. */
10078 static void
10079 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10081 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10082 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10084 if (TARGET_FDPIC)
10086 rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
10088 sh_emit_storesi (tramp_mem, 0, a);
10089 sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ());
10091 sh_emit_storehi (tramp_mem, 8, 0xd302, 0xd203);
10092 sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21);
10093 sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009);
10095 sh_emit_storesi (tramp_mem, 20, cxt);
10096 sh_emit_storesi (tramp_mem, 24, fnaddr);
10098 else
10100 sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301);
10101 sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009);
10103 sh_emit_storesi (tramp_mem, 8, cxt);
10104 sh_emit_storesi (tramp_mem, 12, fnaddr);
10106 if (TARGET_HARD_SH4)
10108 if (!TARGET_INLINE_IC_INVALIDATE
10109 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
10110 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10111 FUNCTION_ORDINARY).sym,
10112 LCT_NORMAL, VOIDmode, tramp, SImode);
10113 else
10114 emit_insn (gen_ic_invalidate_line (tramp));
10118 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10119 static rtx
10120 sh_trampoline_adjust_address (rtx tramp)
10122 return tramp;
10125 /* If PIC, we cannot make sibling calls to global functions
10126 because the PLT requires r12 to be live. */
10127 static bool
10128 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10130 return (1
10131 && ! sh_cfun_interrupt_handler_p ()
10132 && (! flag_pic || TARGET_FDPIC
10133 || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
10134 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10137 /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */
10138 void
10139 sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
10141 const_tree decl = SYMBOL_REF_DECL (sym);
10142 bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
10144 if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
10145 emit_insn (gen_sym_label2reg (reg, sym, lab));
10146 else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
10147 emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
10148 else
10149 emit_insn (gen_symPLT_label2reg (reg, sym, lab));
10152 /* Machine specific built-in functions. */
10154 struct builtin_description
10156 bool (* const is_enabled) (void);
10157 const enum insn_code icode;
10158 const char *const name;
10159 int signature;
10160 tree fndecl;
10163 /* This function can be used if there are any built-ins that are not for
10164 SHmedia. It's commented out to avoid the defined-but-unused warning. */
10165 static bool
10166 sh1_builtin_p (void)
10168 return TARGET_SH1;
10171 /* describe number and signedness of arguments; arg[0] == result
10172 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10173 /* 9: 64-bit pointer, 10: 32-bit pointer */
10174 static const char signature_args[][4] =
10176 #define SH_BLTIN_V2SI2 0
10177 { 4, 4 },
10178 #define SH_BLTIN_V4HI2 1
10179 { 4, 4 },
10180 #define SH_BLTIN_V2SI3 2
10181 { 4, 4, 4 },
10182 #define SH_BLTIN_V4HI3 3
10183 { 4, 4, 4 },
10184 #define SH_BLTIN_V8QI3 4
10185 { 4, 4, 4 },
10186 #define SH_BLTIN_MAC_HISI 5
10187 { 1, 4, 4, 1 },
10188 #define SH_BLTIN_SH_HI 6
10189 { 4, 4, 1 },
10190 #define SH_BLTIN_SH_SI 7
10191 { 4, 4, 1 },
10192 #define SH_BLTIN_V4HI2V2SI 8
10193 { 4, 4, 4 },
10194 #define SH_BLTIN_V4HI2V8QI 9
10195 { 4, 4, 4 },
10196 #define SH_BLTIN_SISF 10
10197 { 4, 2 },
10198 #define SH_BLTIN_LDUA_L 11
10199 { 2, 10 },
10200 #define SH_BLTIN_LDUA_Q 12
10201 { 1, 10 },
10202 #define SH_BLTIN_STUA_L 13
10203 { 0, 10, 2 },
10204 #define SH_BLTIN_STUA_Q 14
10205 { 0, 10, 1 },
10206 #define SH_BLTIN_LDUA_L64 15
10207 { 2, 9 },
10208 #define SH_BLTIN_LDUA_Q64 16
10209 { 1, 9 },
10210 #define SH_BLTIN_STUA_L64 17
10211 { 0, 9, 2 },
10212 #define SH_BLTIN_STUA_Q64 18
10213 { 0, 9, 1 },
10214 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10215 #define SH_BLTIN_2 19
10216 #define SH_BLTIN_SU 19
10217 { 1, 2 },
10218 #define SH_BLTIN_3 20
10219 #define SH_BLTIN_SUS 20
10220 { 2, 2, 1 },
10221 #define SH_BLTIN_PSSV 21
10222 { 0, 8, 2, 2 },
10223 #define SH_BLTIN_XXUU 22
10224 #define SH_BLTIN_UUUU 22
10225 { 1, 1, 1, 1 },
10226 #define SH_BLTIN_PV 23
10227 { 0, 8 },
10228 #define SH_BLTIN_VP 24
10229 { 8, 0 },
10230 #define SH_BLTIN_UV 25
10231 { 1, 0 },
10232 #define SH_BLTIN_VU 26
10233 { 0, 1 },
10235 /* mcmv: operands considered unsigned. */
10236 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10237 /* mperm: control value considered unsigned int. */
10238 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10239 /* mshards_q: returns signed short. */
10240 /* nsb: takes long long arg, returns unsigned char. */
10241 static struct builtin_description bdesc[] =
10243 { sh1_builtin_p,
10244 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
10245 { sh1_builtin_p,
10246 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
10249 static tree sh_builtin_get_fpscr;
10250 static tree sh_builtin_set_fpscr;
10252 static void
10253 sh_init_builtins (void)
10255 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10256 memset (shared, 0, sizeof shared);
10258 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
10260 builtin_description* d = &bdesc[di];
10262 if (!d->is_enabled ())
10263 continue;
10265 tree type, arg_type = NULL_TREE;
10266 int signature = d->signature;
10268 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10269 type = shared[signature];
10270 else
10272 int has_result = signature_args[signature][0] != 0;
10273 tree args[3];
10275 if (! TARGET_FPU_ANY
10276 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10277 continue;
10278 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
10279 args[i] = NULL_TREE;
10280 for (int i = 3; ; i--)
10282 int arg = signature_args[signature][i];
10283 int opno = i - 1 + has_result;
10285 if (arg & 8)
10286 arg_type = ptr_type_node;
10287 else if (arg)
10288 arg_type = (*lang_hooks.types.type_for_mode)
10289 (insn_data[d->icode].operand[opno].mode, (arg & 1));
10290 else if (i)
10291 continue;
10292 else
10293 arg_type = void_type_node;
10294 if (i == 0)
10295 break;
10296 args[i-1] = arg_type;
10298 type = build_function_type_list (arg_type, args[0], args[1],
10299 args[2], NULL_TREE);
10300 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10301 shared[signature] = type;
10303 d->fndecl =
10304 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10305 NULL, NULL_TREE);
10306 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
10307 if (d->icode == CODE_FOR_sts_fpscr)
10308 sh_builtin_get_fpscr = d->fndecl;
10309 else if (d->icode == CODE_FOR_set_fpscr)
10310 sh_builtin_set_fpscr = d->fndecl;
10314 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
10316 static void
10317 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
10319 const unsigned SH_FE_INVALID = 64;
10320 const unsigned SH_FE_DIVBYZERO = 32;
10321 const unsigned SH_FE_OVERFLOW = 16;
10322 const unsigned SH_FE_UNDERFLOW = 8;
10323 const unsigned SH_FE_INEXACT = 4;
10324 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
10325 | SH_FE_DIVBYZERO
10326 | SH_FE_OVERFLOW
10327 | SH_FE_UNDERFLOW
10328 | SH_FE_INEXACT);
10329 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
10330 tree fenv_var, mask, ld_fenv, masked_fenv;
10331 tree new_fenv_var, reload_fenv, restore_fnenv;
10332 tree update_call, atomic_feraiseexcept, hold_fnclex;
10334 if (! TARGET_FPU_ANY)
10335 return;
10337 /* Generate the equivalent of :
10338 unsigned int fenv_var;
10339 fenv_var = __builtin_sh_get_fpscr ();
10341 unsigned int masked_fenv;
10342 masked_fenv = fenv_var & mask;
10344 __builtin_sh_set_fpscr (masked_fenv); */
10346 fenv_var = create_tmp_var_raw (unsigned_type_node);
10347 mask = build_int_cst (unsigned_type_node,
10348 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
10349 | SH_FE_ALL_EXCEPT));
10350 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
10351 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
10352 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
10353 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10354 fenv_var = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
10355 build2 (COMPOUND_EXPR, void_type_node, masked_fenv,
10356 ld_fenv),
10357 NULL_TREE, NULL_TREE);
10358 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, hold_fnclex);
10360 /* Store the value of masked_fenv to clear the exceptions:
10361 __builtin_sh_set_fpscr (masked_fenv); */
10363 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10365 /* Generate the equivalent of :
10366 unsigned int new_fenv_var;
10367 new_fenv_var = __builtin_sh_get_fpscr ();
10369 __builtin_sh_set_fpscr (fenv_var);
10371 __atomic_feraiseexcept (new_fenv_var); */
10373 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
10374 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
10375 build_call_expr (sh_builtin_get_fpscr, 0));
10376 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
10377 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
10378 update_call = build_call_expr (atomic_feraiseexcept, 1,
10379 fold_convert (integer_type_node,
10380 new_fenv_var));
10381 *update = build2 (COMPOUND_EXPR, void_type_node,
10382 build2 (COMPOUND_EXPR, void_type_node,
10383 reload_fenv, restore_fnenv), update_call);
10386 /* Implements target hook vector_mode_supported_p. */
10387 bool
10388 sh_vector_mode_supported_p (machine_mode mode ATTRIBUTE_UNUSED)
10390 return false;
10393 bool
10394 sh_frame_pointer_required (void)
10396 /* If needed override this in other tm.h files to cope with various OS
10397 lossage requiring a frame pointer. */
10398 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10399 return true;
10401 if (crtl->profile)
10402 return true;
10404 return false;
10407 /* Implements target hook dwarf_calling_convention. Return an enum
10408 of dwarf_calling_convention. */
10410 sh_dwarf_calling_convention (const_tree func)
10412 if (sh_attr_renesas_p (func))
10413 return DW_CC_GNU_renesas_sh;
10415 return DW_CC_normal;
10418 /* Returns the sh builtin decl for CODE. */
10419 static tree
10420 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10422 if (code >= ARRAY_SIZE (bdesc))
10423 return error_mark_node;
10425 if (!bdesc[code].is_enabled ())
10426 return error_mark_node;
10428 return bdesc[code].fndecl;
10431 /* Expand an expression EXP that calls a built-in function,
10432 with result going to TARGET if that's convenient
10433 (and in mode MODE if that's convenient).
10434 SUBTARGET may be used as the target for computing one of EXP's operands.
10435 IGNORE is nonzero if the value is to be ignored. */
10436 static rtx
10437 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10438 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10440 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10441 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10442 const struct builtin_description *d = &bdesc[fcode];
10443 enum insn_code icode = d->icode;
10444 int signature = d->signature;
10445 int nop = 0;
10446 rtx op[4];
10448 if (signature_args[signature][0])
10450 if (ignore)
10451 return NULL_RTX;
10453 machine_mode tmode = insn_data[icode].operand[0].mode;
10454 if (! target || GET_MODE (target) != tmode
10455 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10456 target = gen_reg_rtx (tmode);
10457 op[nop++] = target;
10459 else
10460 target = NULL_RTX;
10462 for (int i = 1; i <= 3; i++, nop++)
10464 if (! signature_args[signature][i])
10465 break;
10466 tree arg = CALL_EXPR_ARG (exp, i - 1);
10467 if (arg == error_mark_node)
10468 return const0_rtx;
10470 machine_mode opmode;
10471 tree optype;
10472 if (signature_args[signature][i] & 8)
10474 opmode = ptr_mode;
10475 optype = ptr_type_node;
10477 else
10479 opmode = insn_data[icode].operand[nop].mode;
10480 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10483 machine_mode argmode = TYPE_MODE (TREE_TYPE (arg));
10484 if (argmode != opmode)
10485 arg = build1 (NOP_EXPR, optype, arg);
10486 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10487 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10488 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10491 rtx pat = NULL_RTX;
10493 switch (nop)
10495 case 1:
10496 pat = (*insn_data[d->icode].genfun) (op[0]);
10497 break;
10498 case 2:
10499 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10500 break;
10501 case 3:
10502 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10503 break;
10504 case 4:
10505 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10506 break;
10507 default:
10508 gcc_unreachable ();
10510 if (! pat)
10511 return NULL_RTX;
10512 emit_insn (pat);
10513 return target;
10516 /* Implement TARGET_HARD_REGNO_NREGS. On the SH all but the XD regs are
10517 UNITS_PER_WORD bits wide. */
10519 static unsigned int
10520 sh_hard_regno_nregs (unsigned int regno, machine_mode mode)
10522 if (XD_REGISTER_P (regno))
10523 return CEIL (GET_MODE_SIZE (mode), 2 * UNITS_PER_WORD);
10524 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
10527 /* Implement TARGET_HARD_REGNO_MODE_OK.
10529 We can allow any mode in any general register. The special registers
10530 only allow SImode. Don't allow any mode in the PR.
10532 We cannot hold DCmode values in the XD registers because alter_reg
10533 handles subregs of them incorrectly. We could work around this by
10534 spacing the XD registers like the DR registers, but this would require
10535 additional memory in every compilation to hold larger register vectors.
10536 We could hold SFmode / SCmode values in XD registers, but that
10537 would require a tertiary reload when reloading from / to memory,
10538 and a secondary reload to reload from / to general regs; that
10539 seems to be a losing proposition.
10541 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10542 it won't be ferried through GP registers first. */
10543 static bool
10544 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10546 if (SPECIAL_REGISTER_P (regno))
10547 return mode == SImode;
10549 if (regno == FPUL_REG)
10550 return (mode == SImode || mode == SFmode);
10552 if (FP_REGISTER_P (regno) && mode == SFmode)
10553 return true;
10555 if (mode == V2SFmode)
10557 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10558 || GENERAL_REGISTER_P (regno)))
10559 return true;
10560 else
10561 return false;
10564 if (mode == V4SFmode)
10566 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10567 || GENERAL_REGISTER_P (regno))
10568 return true;
10569 else
10570 return false;
10573 if (mode == V16SFmode)
10574 return regno == FIRST_XD_REG;
10576 if (FP_REGISTER_P (regno))
10578 if (mode == SFmode
10579 || mode == SImode
10580 || ((TARGET_SH2E) && mode == SCmode)
10581 || (((TARGET_FPU_DOUBLE && mode == DFmode) || mode == DCmode)
10582 && ((regno - FIRST_FP_REG) & 1) == 0)
10583 || (TARGET_SH4 && mode == TImode
10584 && ((regno - FIRST_FP_REG) & 3) == 0))
10585 return true;
10586 else
10587 return false;
10590 if (XD_REGISTER_P (regno))
10591 return mode == DFmode;
10593 if (regno == PR_REG)
10594 return mode == SImode;
10596 if (regno == FPSCR_REG)
10597 return mode == SImode;
10599 return true;
10602 /* Implement TARGET_MODES_TIEABLE_P.
10604 If TARGET_HARD_REGNO_MODE_OK could produce different values for MODE1
10605 and MODE2, for any hard reg, then this must be false for correct output.
10606 That's the case for xd registers: we don't hold SFmode values in
10607 them, so we can't tie an SFmode pseudos with one in another
10608 floating-point mode. */
10610 static bool
10611 sh_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10613 return (mode1 == mode2
10614 || (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
10615 && (mode1 != SFmode && mode2 != SFmode)));
10618 /* Specify the modes required to caller save a given hard regno.
10619 choose_hard_reg_mode chooses mode based on TARGET_HARD_REGNO_MODE_OK
10620 and returns ?Imode for float regs when sh_hard_regno_mode_ok
10621 permits integer modes on them. That makes LRA's split process
10622 unhappy. See PR55212.
10624 machine_mode
10625 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
10626 machine_mode mode)
10628 if (FP_REGISTER_P (regno)
10629 && (mode == SFmode
10630 || mode == SCmode
10631 || ((mode == DFmode || mode == DCmode)
10632 && ((regno - FIRST_FP_REG) & 1) == 0)))
10633 return mode;
10635 return choose_hard_reg_mode (regno, nregs, false);
10638 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10639 static bool
10640 sh_can_change_mode_class (machine_mode from, machine_mode to,
10641 reg_class_t rclass)
10643 /* We want to enable the use of SUBREGs as a means to
10644 VEC_SELECT a single element of a vector. */
10646 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
10647 This can be problematic when SFmode vector subregs need to be accessed
10648 on the stack with displacement addressing, as it happens with -O0.
10649 Thus we disallow the mode change for -O0. */
10650 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10651 return optimize ? !reg_classes_intersect_p (GENERAL_REGS, rclass) : true;
10653 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10655 if (TARGET_LITTLE_ENDIAN)
10657 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10658 return !reg_classes_intersect_p (DF_REGS, rclass);
10660 else
10662 if (GET_MODE_SIZE (from) < 8)
10663 return !reg_classes_intersect_p (DF_REGS, rclass);
10666 return true;
10669 /* Return true if registers in machine mode MODE will likely be
10670 allocated to registers in small register classes. */
10671 bool
10672 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
10674 return true;
10677 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10678 that label is used. */
10679 void
10680 sh_mark_label (rtx address, int nuses)
10682 if (GOTOFF_P (address))
10684 /* Extract the label or symbol. */
10685 address = XEXP (address, 0);
10686 if (GET_CODE (address) == PLUS)
10687 address = XEXP (address, 0);
10688 address = XVECEXP (address, 0, 0);
10690 if (GET_CODE (address) == LABEL_REF
10691 && LABEL_P (XEXP (address, 0)))
10692 LABEL_NUSES (XEXP (address, 0)) += nuses;
10695 /* Compute extra cost of moving data between one register class
10696 and another.
10698 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10699 uses this information. Hence, the general register <-> floating point
10700 register information here is not used for SFmode. */
10701 static int
10702 sh_register_move_cost (machine_mode mode,
10703 reg_class_t srcclass, reg_class_t dstclass)
10705 if (dstclass == T_REGS || dstclass == PR_REGS)
10706 return 10;
10708 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10709 return 4;
10711 if (mode == SImode && TARGET_FMOVD
10712 && REGCLASS_HAS_FP_REG (srcclass)
10713 && REGCLASS_HAS_FP_REG (dstclass))
10714 return 4;
10716 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10717 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10719 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10720 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10721 return 9;
10723 if ((REGCLASS_HAS_FP_REG (dstclass)
10724 && REGCLASS_HAS_GENERAL_REG (srcclass))
10725 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10726 && REGCLASS_HAS_FP_REG (srcclass)))
10728 /* Discourage trying to use fp regs for a pointer. This also
10729 discourages fp regs with SImode because Pmode is an alias
10730 of SImode on this target. See PR target/48596. */
10731 int addend = (mode == Pmode) ? 40 : 0;
10733 return ((TARGET_FMOVD ? 8 : 12) + addend)
10734 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10737 if ((dstclass == FPUL_REGS
10738 && REGCLASS_HAS_GENERAL_REG (srcclass))
10739 || (srcclass == FPUL_REGS
10740 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10741 return 5;
10743 if ((dstclass == FPUL_REGS
10744 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10745 || (srcclass == FPUL_REGS
10746 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10747 return 7;
10749 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10750 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10751 return 4;
10753 if (TARGET_FMOVD
10754 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10755 && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10756 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10758 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10761 static rtx
10762 emit_load_ptr (rtx reg, rtx addr)
10764 rtx mem = gen_const_mem (ptr_mode, addr);
10766 if (Pmode != ptr_mode)
10767 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10768 return emit_move_insn (reg, mem);
10771 static void
10772 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10773 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10774 tree function)
10776 CUMULATIVE_ARGS cum;
10777 int structure_value_byref = 0;
10778 rtx this_rtx, this_value, sibcall, funexp;
10779 rtx_insn *insns;
10780 tree funtype = TREE_TYPE (function);
10781 int simple_add = CONST_OK_FOR_ADD (delta);
10782 int did_load = 0;
10783 rtx scratch0, scratch1, scratch2;
10785 reload_completed = 1;
10786 epilogue_completed = 1;
10787 crtl->uses_only_leaf_regs = 1;
10789 emit_note (NOTE_INSN_PROLOGUE_END);
10791 /* Find the "this" pointer. We have such a wide range of ABIs for the
10792 SH that it's best to do this completely machine independently.
10793 "this" is passed as first argument, unless a structure return pointer
10794 comes first, in which case "this" comes second. */
10795 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10796 #ifndef PCC_STATIC_STRUCT_RETURN
10797 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10798 structure_value_byref = 1;
10799 #endif /* not PCC_STATIC_STRUCT_RETURN */
10800 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10802 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10804 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
10806 this_rtx
10807 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
10809 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10810 static chain pointer (even if you can't have nested virtual functions
10811 right now, someone might implement them sometime), and the rest of the
10812 registers are used for argument passing, are callee-saved, or reserved. */
10813 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10814 -ffixed-reg has been used. */
10815 if (! call_used_regs[0] || fixed_regs[0])
10816 error ("r0 needs to be available as a call-clobbered register");
10817 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10820 if (call_used_regs[1] && ! fixed_regs[1])
10821 scratch1 = gen_rtx_REG (ptr_mode, 1);
10822 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10823 pointing where to return struct values. */
10824 if (call_used_regs[3] && ! fixed_regs[3])
10825 scratch2 = gen_rtx_REG (Pmode, 3);
10828 this_value = plus_constant (Pmode, this_rtx, delta);
10829 if (vcall_offset
10830 && (simple_add || scratch0 != scratch1)
10831 && strict_memory_address_p (ptr_mode, this_value))
10833 emit_load_ptr (scratch0, this_value);
10834 did_load = 1;
10837 if (!delta)
10838 ; /* Do nothing. */
10839 else if (simple_add)
10840 emit_move_insn (this_rtx, this_value);
10841 else
10843 emit_move_insn (scratch1, GEN_INT (delta));
10844 emit_insn (gen_add2_insn (this_rtx, scratch1));
10847 if (vcall_offset)
10849 rtx offset_addr;
10851 if (!did_load)
10852 emit_load_ptr (scratch0, this_rtx);
10854 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
10855 if (strict_memory_address_p (ptr_mode, offset_addr))
10856 ; /* Do nothing. */
10857 else if (scratch0 != scratch1)
10859 /* scratch0 != scratch1, and we have indexed loads. Get better
10860 schedule by loading the offset into r1 and using an indexed
10861 load - then the load of r1 can issue before the load from
10862 (this_rtx + delta) finishes. */
10863 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10864 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10866 else if (CONST_OK_FOR_ADD (vcall_offset))
10868 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10869 offset_addr = scratch0;
10871 else if (scratch0 != scratch1)
10873 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10874 emit_insn (gen_add2_insn (scratch0, scratch1));
10875 offset_addr = scratch0;
10877 else
10878 gcc_unreachable (); /* FIXME */
10879 emit_load_ptr (scratch0, offset_addr);
10881 if (Pmode != ptr_mode)
10882 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10883 emit_insn (gen_add2_insn (this_rtx, scratch0));
10886 /* Generate a tail call to the target function. */
10887 if (! TREE_USED (function))
10889 assemble_external (function);
10890 TREE_USED (function) = 1;
10892 funexp = XEXP (DECL_RTL (function), 0);
10893 /* If the function is overridden, so is the thunk, hence we don't
10894 need GOT addressing even if this is a public symbol. */
10895 #if 0
10896 if (TARGET_SH1 && ! flag_weak)
10897 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10898 else
10899 #endif
10900 if (TARGET_SH2 && flag_pic)
10902 if (TARGET_FDPIC)
10904 sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
10905 XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
10907 else
10909 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10910 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10913 else
10915 emit_move_insn (scratch2, funexp);
10916 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10917 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10919 sibcall = emit_call_insn (sibcall);
10920 SIBLING_CALL_P (sibcall) = 1;
10921 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10922 emit_barrier ();
10924 /* Run just enough of rest_of_compilation to do scheduling and get
10925 the insns emitted. Note that use_thunk calls
10926 assemble_start_function and assemble_end_function. */
10928 insns = get_insns ();
10930 if (optimize > 0)
10932 if (! cfun->cfg)
10933 init_flow (cfun);
10934 split_all_insns_noflow ();
10937 sh_reorg ();
10938 shorten_branches (insns);
10939 final_start_function (insns, file, 1);
10940 final (insns, file, 1);
10941 final_end_function ();
10943 reload_completed = 0;
10944 epilogue_completed = 0;
10947 /* Return an RTX pair for the address and call site label of a function
10948 NAME of kind KIND, placing the result in TARGET if not NULL. For
10949 SFUNC_STATIC, if FDPIC, the LAB member of result will be set to
10950 (const_int 0) if jsr should be used, or a label_ref if bsrf should
10951 be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the
10952 address of the function itself, not a function descriptor, so they
10953 can only be used with functions not using the FDPIC register that
10954 are known to be called directory without a PLT entry. */
10956 function_symbol_result
10957 function_symbol (rtx target, const char *name, sh_function_kind kind)
10959 /* If this is not an ordinary function, the name usually comes from a
10960 string literal or an sprintf buffer. Make sure we use the same
10961 string consistently, so that cse will be able to unify address loads. */
10962 if (kind != FUNCTION_ORDINARY)
10963 name = IDENTIFIER_POINTER (get_identifier (name));
10964 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
10965 rtx lab = const0_rtx;
10966 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10967 if (flag_pic)
10968 switch (kind)
10970 case FUNCTION_ORDINARY:
10971 break;
10972 case SFUNC_GOT:
10974 rtx reg = target ? target : gen_reg_rtx (Pmode);
10976 emit_insn (gen_symGOT2reg (reg, sym));
10977 sym = reg;
10978 break;
10980 case SFUNC_STATIC:
10982 rtx reg = target ? target : gen_reg_rtx (Pmode);
10984 if (TARGET_FDPIC)
10986 /* We use PC-relative calls, since GOTOFF can only refer
10987 to writable data. This works along with sh_sfunc_call. */
10988 lab = PATTERN (gen_call_site ());
10989 emit_insn (gen_sym_label2reg (reg, sym, lab));
10991 else
10993 /* ??? To allow cse to work, we use GOTOFF relocations.
10994 we could add combiner patterns to transform this into
10995 straight pc-relative calls with sym2PIC / bsrf when
10996 label load and function call are still 1:1 and in the
10997 same basic block during combine. */
10998 emit_insn (gen_symGOTOFF2reg (reg, sym));
11001 sym = reg;
11002 break;
11005 if (target && sym != target)
11007 emit_move_insn (target, sym);
11008 return function_symbol_result (target, lab);
11010 return function_symbol_result (sym, lab);
11013 /* Find the number of the first general purpose register in S that
11014 is not set. */
11015 static int
11016 scavenge_reg (HARD_REG_SET *s)
11018 for (int r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11019 if (TEST_HARD_REG_BIT (*s, r))
11020 return r;
11021 return -1;
11025 sh_get_pr_initial_val (void)
11027 /* If we haven't finished rtl generation, there might be a nonlocal label
11028 that we haven't seen yet.
11029 ??? get_hard_reg_initial_val fails if it is called after register
11030 allocation has started, unless it has been called before for the
11031 same register. And even then, we end in trouble if we didn't use
11032 the register in the same basic block before. So call
11033 get_hard_reg_initial_val now and wrap it in an unspec if we might
11034 need to replace it. */
11035 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11036 combine can put the pseudo returned by get_hard_reg_initial_val into
11037 instructions that need a general purpose registers, which will fail to
11038 be recognized when the pseudo becomes allocated to PR. */
11039 rtx val = get_hard_reg_initial_val (Pmode, PR_REG);
11040 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11043 bool
11044 sh_expand_t_scc (rtx operands[])
11046 enum rtx_code code = GET_CODE (operands[1]);
11047 rtx target = operands[0];
11048 rtx op0 = operands[2];
11049 rtx op1 = operands[3];
11050 rtx result = target;
11052 if (!REG_P (op0) || REGNO (op0) != T_REG
11053 || !CONST_INT_P (op1))
11054 return false;
11055 if (!REG_P (result))
11056 result = gen_reg_rtx (SImode);
11057 HOST_WIDE_INT val = INTVAL (op1);
11058 if ((code == EQ && val == 1) || (code == NE && val == 0))
11059 emit_insn (gen_movt (result, get_t_reg_rtx ()));
11060 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11061 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
11062 else if (code == EQ || code == NE)
11063 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11064 else
11065 return false;
11066 if (result != target)
11067 emit_move_insn (target, result);
11068 return true;
11071 /* INSN is an sfunc; return the rtx that describes the address used. */
11072 static rtx
11073 extract_sfunc_addr (rtx insn)
11075 rtx pattern = PATTERN (insn);
11076 const int len = XVECLEN (pattern, 0);
11077 for (int i = 0; i < len; i++)
11079 rtx part = XVECEXP (pattern, 0, i);
11080 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11081 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11082 return XEXP (part, 0);
11084 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11085 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11088 /* Verify that the register in use_sfunc_addr still agrees with the address
11089 used in the sfunc. This prevents fill_slots_from_thread from changing
11090 use_sfunc_addr.
11091 INSN is the use_sfunc_addr instruction, and REG is the register it
11092 guards. */
11093 bool
11094 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
11096 /* Search for the sfunc. It should really come right after INSN. */
11097 while ((insn = NEXT_INSN (insn)))
11099 if (LABEL_P (insn) || JUMP_P (insn))
11100 break;
11101 if (! INSN_P (insn))
11102 continue;
11104 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
11105 insn = seq->insn (0);
11106 if (GET_CODE (PATTERN (insn)) != PARALLEL
11107 || get_attr_type (insn) != TYPE_SFUNC)
11108 continue;
11109 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11111 gcc_unreachable ();
11114 /* This function returns a constant rtx that represents 2**15 / pi in
11115 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
11116 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
11117 static GTY(()) rtx sh_fsca_sf2int_rtx;
11120 sh_fsca_sf2int (void)
11122 if (! sh_fsca_sf2int_rtx)
11124 REAL_VALUE_TYPE rv;
11126 real_from_string (&rv, "10430.378350470453");
11127 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11130 return sh_fsca_sf2int_rtx;
11133 /* This function returns a constant rtx that represents pi / 2**15 in
11134 SFmode. It's used to scale SFmode angles, in radians, to a
11135 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
11136 maps to 0x10000. */
11137 static GTY(()) rtx sh_fsca_int2sf_rtx;
11140 sh_fsca_int2sf (void)
11142 if (! sh_fsca_int2sf_rtx)
11144 REAL_VALUE_TYPE rv;
11146 real_from_string (&rv, "9.587379924285257e-5");
11147 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11150 return sh_fsca_int2sf_rtx;
11153 /* Initialize the CUMULATIVE_ARGS structure. */
11154 void
11155 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11156 tree fntype,
11157 rtx libname ATTRIBUTE_UNUSED,
11158 tree fndecl,
11159 signed int n_named_args,
11160 machine_mode mode)
11162 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11163 pcum->free_single_fp_reg = 0;
11164 pcum->outgoing = n_named_args != -1;
11166 /* FIXME: Should we check TARGET_HITACHI here ??? */
11167 pcum->renesas_abi = sh_attr_renesas_p (fntype);
11169 if (fntype)
11171 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11172 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11173 pcum->prototype_p = prototype_p (fntype);
11174 pcum->arg_count [(int) SH_ARG_INT] = false;
11176 else
11178 pcum->arg_count [(int) SH_ARG_INT] = 0;
11179 pcum->prototype_p = false;
11180 if (mode != VOIDmode)
11182 /* If the default ABI is the Renesas ABI then all library
11183 calls must assume that the library will be using the
11184 Renesas ABI. So if the function would return its result
11185 in memory then we must force the address of this memory
11186 block onto the stack. Ideally we would like to call
11187 targetm.calls.return_in_memory() here but we do not have
11188 the TYPE or the FNDECL available so we synthesize the
11189 contents of that function as best we can. */
11190 pcum->force_mem =
11191 (TARGET_DEFAULT & MASK_HITACHI)
11192 && (mode == BLKmode
11193 || (GET_MODE_SIZE (mode) > 4
11194 && !(mode == DFmode
11195 && TARGET_FPU_DOUBLE)));
11197 else
11198 pcum->force_mem = false;
11203 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
11205 enum rtx_code code = TRUNCATE;
11207 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11209 rtx inner = XEXP (x, 0);
11210 machine_mode inner_mode = GET_MODE (inner);
11212 if (inner_mode == mode)
11213 return inner;
11214 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11215 x = inner;
11216 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11217 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11219 code = GET_CODE (x);
11220 x = inner;
11223 return gen_rtx_fmt_e (code, mode, x);
11226 /* Load and store depend on the highpart of the address. However,
11227 set_attr_alternative does not give well-defined results before reload,
11228 so we must look at the rtl ourselves to see if any of the feeding
11229 registers is used in a memref.
11231 Return true iff INSN contains a MEM. */
11232 bool
11233 sh_contains_memref_p (rtx insn)
11235 subrtx_iterator::array_type array;
11236 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
11237 if (MEM_P (*iter))
11238 return true;
11239 return false;
11242 /* Return true iff INSN loads a banked register. */
11243 bool
11244 sh_loads_bankedreg_p (rtx insn)
11246 if (GET_CODE (PATTERN (insn)) == SET)
11248 rtx op = SET_DEST (PATTERN(insn));
11249 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11250 return true;
11253 return false;
11256 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
11257 static reg_class_t
11258 sh_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
11260 return rclass;
11263 /* Implement TARGET_SECONDARY_RELOAD. */
11264 static reg_class_t
11265 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11266 machine_mode mode, secondary_reload_info *sri)
11268 enum reg_class rclass = (enum reg_class) rclass_i;
11270 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
11271 && REG_P (XEXP (XEXP (x, 0), 0))
11272 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
11273 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11275 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
11276 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11278 if (REG_P (x) && REGNO (x) == GBR_REG)
11279 return NO_REGS;
11281 if (in_p)
11283 if (REGCLASS_HAS_FP_REG (rclass)
11284 && immediate_operand ((x), mode)
11285 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
11286 switch (mode)
11288 case E_SFmode:
11289 sri->icode = CODE_FOR_reload_insf__frn;
11290 return NO_REGS;
11291 case E_DFmode:
11292 sri->icode = CODE_FOR_reload_indf__frn;
11293 return NO_REGS;
11294 case E_SImode:
11295 /* ??? If we knew that we are in the appropriate mode -
11296 single precision - we could use a reload pattern directly. */
11297 return FPUL_REGS;
11298 default:
11299 abort ();
11301 if (rclass == FPUL_REGS
11302 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11303 || REGNO (x) == T_REG))
11304 || GET_CODE (x) == PLUS))
11305 return GENERAL_REGS;
11306 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11308 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11309 return GENERAL_REGS;
11310 else if (mode == SFmode)
11311 return FP_REGS;
11312 sri->icode = CODE_FOR_reload_insi__i_fpul;
11313 return NO_REGS;
11315 if (rclass == FPSCR_REGS
11316 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11317 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11318 return GENERAL_REGS;
11319 } /* end of input-only processing. */
11321 if (((REGCLASS_HAS_FP_REG (rclass)
11322 && (REG_P (x)
11323 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11324 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11325 && TARGET_FMOVD))))
11326 || (REGCLASS_HAS_GENERAL_REG (rclass)
11327 && REG_P (x)
11328 && FP_REGISTER_P (REGNO (x))))
11329 && (mode == SFmode || mode == SImode))
11330 return FPUL_REGS;
11331 if ((rclass == FPUL_REGS
11332 || (REGCLASS_HAS_FP_REG (rclass) && mode == SImode))
11333 && (MEM_P (x)
11334 || (REG_P (x)
11335 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11336 || REGNO (x) == T_REG
11337 || system_reg_operand (x, VOIDmode)))))
11339 if (rclass == FPUL_REGS)
11340 return GENERAL_REGS;
11341 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
11344 if ((rclass == MAC_REGS || rclass == PR_REGS)
11345 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11346 && rclass != REGNO_REG_CLASS (REGNO (x)))
11347 return GENERAL_REGS;
11349 /* If here fall back to loading FPUL register through general registers.
11350 This case can happen when movsi_ie insn is picked initially to
11351 load/store the FPUL register from/to another register, and then the
11352 other register is allocated on the stack. */
11353 if (rclass == FPUL_REGS && true_regnum (x) == -1)
11354 return GENERAL_REGS;
11356 /* Force mov.b / mov.w displacement addressing insn to use R0 as
11357 the other operand.
11358 On SH2A could also just leave it alone here, which would result in a
11359 4 byte move insn being generated instead. However, for this to work
11360 the insns must have the appropriate alternatives. */
11361 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11362 && satisfies_constraint_Sdd (x)
11363 && sh_disp_addr_displacement (x)
11364 <= sh_max_mov_insn_displacement (mode, false))
11365 return R0_REGS;
11367 /* When reload is trying to address a QImode or HImode subreg on the stack,
11368 force any subreg byte into R0_REGS, as this is going to become a
11369 displacement address.
11370 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
11371 is on the stack, the memref to it might already require a displacement
11372 and that has to be added to the final address. At this point we don't
11373 know the cumulative displacement so we assume the worst case. */
11374 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11375 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
11376 return R0_REGS;
11378 return NO_REGS;
11381 /* Return true if SUBST can't safely replace its equivalent during RA. */
11382 static bool
11383 sh_cannot_substitute_mem_equiv_p (rtx)
11385 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
11386 uses R0 and may cause spill failure when R0 is already used.
11387 We have to return true for that case at least.
11388 Moreover SH has strong R0 parity and also have not enough numbers of
11389 the hard registers to make the equiv substitution win in the size
11390 and the speed on average working sets. The pseudos produced to
11391 hold the equiv values can't get good hard registers for bad cases
11392 and end up memory save/restore insns which make the code worse. */
11393 return true;
11396 /* Return true if DISP can be legitimized. */
11397 static bool
11398 sh_legitimize_address_displacement (rtx *disp, rtx *offs,
11399 machine_mode mode)
11401 if ((TARGET_FPU_DOUBLE && mode == DFmode)
11402 || (TARGET_SH2E && mode == SFmode))
11403 return false;
11405 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, INTVAL (*disp));
11406 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
11408 *disp = adj.mov_disp;
11409 *offs = adj.offset_adjust;
11410 return true;
11413 return false;
11416 /* Return true if movsf insn should be splited with an additional
11417 register. */
11418 bool
11419 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
11421 /* op0 == op1 */
11422 if (rtx_equal_p (op0, op1))
11423 return true;
11424 /* fy, FQ, reg */
11425 if (GET_CODE (op1) == CONST_DOUBLE
11426 && ! satisfies_constraint_G (op1)
11427 && ! satisfies_constraint_H (op1)
11428 && REG_P (op0)
11429 && REG_P (op2))
11430 return true;
11431 /* f, r, y */
11432 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
11433 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
11434 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11435 return true;
11436 /* r, f, y */
11437 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
11438 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
11439 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11440 return true;
11442 return false;
11445 static void
11446 sh_conditional_register_usage (void)
11448 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
11449 if (! VALID_REGISTER_P (regno))
11450 fixed_regs[regno] = call_used_regs[regno] = 1;
11451 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
11452 if (flag_pic)
11454 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11455 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11457 if (TARGET_FDPIC)
11459 fixed_regs[PIC_REG] = 1;
11460 call_used_regs[PIC_REG] = 1;
11461 call_really_used_regs[PIC_REG] = 1;
11463 /* Renesas saves and restores mac registers on call. */
11464 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
11466 call_really_used_regs[MACH_REG] = 0;
11467 call_really_used_regs[MACL_REG] = 0;
11470 for (int regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
11471 if (! fixed_regs[regno] && call_really_used_regs[regno])
11472 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
11474 call_really_used_regs[FPSCR_MODES_REG] = 0;
11475 call_really_used_regs[FPSCR_STAT_REG] = 0;
11478 /* Implement TARGET_LEGITIMATE_CONSTANT_P
11480 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
11481 static bool
11482 sh_legitimate_constant_p (machine_mode mode, rtx x)
11484 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
11486 rtx base, offset;
11487 split_const (x, &base, &offset);
11489 if (GET_CODE (base) == SYMBOL_REF
11490 && !offset_within_block_p (base, INTVAL (offset)))
11491 return false;
11494 if (TARGET_FDPIC
11495 && (SYMBOLIC_CONST_P (x)
11496 || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
11497 && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
11498 return false;
11500 return GET_CODE (x) != CONST_DOUBLE
11501 || mode == DFmode || mode == SFmode
11502 || mode == DImode || GET_MODE (x) == VOIDmode;
11505 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11507 static void
11508 sh_init_sync_libfuncs (void)
11510 init_sync_libfuncs (UNITS_PER_WORD);
11513 /* Return true if it is appropriate to emit `ret' instructions in the
11514 body of a function. */
11515 bool
11516 sh_can_use_simple_return_p (void)
11518 if (! reload_completed || frame_pointer_needed)
11519 return false;
11521 /* Moving prologue around does't reduce the size. */
11522 if (optimize_function_for_size_p (cfun))
11523 return false;
11525 /* Finally, allow for pr save. */
11526 HARD_REG_SET live_regs_mask;
11527 int d = calc_live_regs (&live_regs_mask);
11529 if (rounded_frame_size (d) > 4)
11530 return false;
11532 return true;
11535 /*------------------------------------------------------------------------------
11536 Address mode optimization support code
11539 typedef HOST_WIDE_INT disp_t;
11540 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
11541 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
11542 static const disp_t INVALID_DISP = MAX_DISP;
11544 /* A memory reference which is described by a base register and a
11545 displacement. */
11546 class base_reg_disp
11548 public:
11549 base_reg_disp (rtx br, disp_t d);
11551 bool is_reg (void) const;
11552 bool is_disp (void) const;
11553 rtx reg (void) const;
11554 disp_t disp (void) const;
11556 private:
11557 rtx reg_;
11558 disp_t disp_;
11561 inline
11562 base_reg_disp::base_reg_disp (rtx br, disp_t d)
11563 : reg_ (br), disp_ (d)
11567 inline bool
11568 base_reg_disp::is_reg (void) const
11570 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
11573 inline bool
11574 base_reg_disp::is_disp (void) const
11576 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
11579 inline rtx
11580 base_reg_disp::reg (void) const
11582 return reg_;
11585 inline disp_t
11586 base_reg_disp::disp (void) const
11588 return disp_;
11591 /* Find the base register and calculate the displacement for a given
11592 address rtx 'x'. */
11593 static base_reg_disp
11594 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
11595 rtx base_reg = NULL)
11597 if (REG_P (x))
11599 if (REGNO (x) == GBR_REG)
11600 return base_reg_disp (x, disp);
11602 /* We've reached a hard-reg. This is probably the point where
11603 function args are copied to pseudos. Do not go any further and
11604 stick to the pseudo. If the original mem addr was in a hard reg
11605 from the beginning, it will become the base reg. */
11606 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
11607 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
11609 /* Find the def of the reg and trace it. If there are more than one
11610 defs and they are not the same, assume it's not safe to proceed. */
11611 rtx_insn* last_i = NULL;
11612 rtx last_set = NULL;
11613 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
11614 d = DF_REF_NEXT_REG (d))
11616 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
11618 /* Accept multiple defs, as long as they are equal. */
11619 if (last_set == NULL || rtx_equal_p (last_set, set))
11621 last_i = DF_REF_INSN (d);
11622 last_set = set;
11624 else
11626 last_i = NULL;
11627 last_set = NULL;
11628 break;
11632 if (last_set != NULL && last_i != NULL)
11633 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
11634 XEXP (last_set, 0));
11636 /* When here, no previous insn was found that sets the reg.
11637 The input reg is already the base reg. */
11638 return base_reg_disp (x, disp);
11641 else if (GET_CODE (x) == PLUS)
11643 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
11644 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
11646 /* Either left or right val must be a reg.
11647 We don't handle the case of 'reg + reg' here. */
11648 if (left_val.is_reg () && right_val.is_disp ())
11649 return base_reg_disp (left_val.reg (), left_val.disp ()
11650 + right_val.disp () + disp);
11651 else if (right_val.is_reg () && left_val.is_disp ())
11652 return base_reg_disp (right_val.reg (), right_val.disp ()
11653 + left_val.disp () + disp);
11654 else
11655 return base_reg_disp (base_reg, disp);
11658 else if (CONST_INT_P (x))
11659 return base_reg_disp (NULL, disp + INTVAL (x));
11661 /* Didn't find anything useful. */
11662 return base_reg_disp (base_reg, disp);
11665 /* Given an insn and a memory operand, try to find an equivalent GBR
11666 based memory address and return the corresponding new memory address.
11667 Return NULL_RTX if not found. */
11669 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
11671 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
11672 return NULL_RTX;
11674 /* Leave post/pre inc/dec or any other side effect addresses alone. */
11675 if (side_effects_p (XEXP (mem, 0)))
11676 return NULL_RTX;
11678 /* When not optimizing there might be no dataflow available. */
11679 if (df == NULL)
11680 return NULL_RTX;
11682 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
11684 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
11686 /* If GBR is marked as call clobbered we bail out if we see a call.
11687 FIXME: Actually should check if this mem refers to the gbr value
11688 before or after the call. If there is a store_gbr preceeding this
11689 mem, it's safe to use GBR for this mem.
11691 If GBR is not marked as call clobbered, but there is some other
11692 def than a call, it's probably a load_gbr upon which we also
11693 bail out to be on the safe side.
11694 FIXME: Should check if we have a use-after-def case, such as
11695 the call case above. */
11696 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
11697 d = DF_REF_NEXT_REG (d))
11699 if (CALL_P (DF_REF_INSN (d)))
11701 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
11702 return NULL_RTX;
11703 else
11704 continue;
11706 else
11707 return NULL_RTX;
11710 rtx disp = GEN_INT (gbr_disp.disp ());
11711 if (gbr_displacement (disp, GET_MODE (mem)))
11712 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
11715 return NULL_RTX;
11718 /*------------------------------------------------------------------------------
11719 Manual insn combine support code.
11722 /* Return true if the specified insn contains any UNSPECs or
11723 UNSPEC_VOLATILEs. */
11724 static bool
11725 sh_unspec_insn_p (rtx x)
11727 subrtx_iterator::array_type array;
11728 FOR_EACH_SUBRTX (i, array, x, ALL)
11729 if (*i != NULL
11730 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
11731 return true;
11733 return false;
11736 /* Return true if the register operands of the specified insn are modified
11737 between the specified from and to insns (exclusive of those two). */
11738 bool
11739 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
11740 const rtx_insn* from,
11741 const rtx_insn* to)
11743 /* FIXME: Return true for multiple sets for now. */
11744 rtx s = single_set (operands_insn);
11745 if (s == NULL_RTX)
11746 return true;
11748 subrtx_iterator::array_type array;
11749 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
11750 if (*i != NULL &&
11751 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
11752 return true;
11754 return false;
11757 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
11758 negates the T bit and stores the result in the T bit. */
11759 bool
11760 sh_is_nott_insn (const rtx_insn* i)
11762 return i != NULL && GET_CODE (PATTERN (i)) == SET
11763 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
11764 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
11768 sh_movt_set_dest (const rtx_insn* i)
11770 return i == NULL ? NULL : sh_movt_set_dest (PATTERN (i));
11774 sh_movt_set_dest (const_rtx pat)
11776 return GET_CODE (pat) == SET
11777 && arith_reg_dest (XEXP (pat, 0), SImode)
11778 && t_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11781 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
11782 that stores the negated T bit in a register, and return the destination
11783 register rtx, or null. */
11785 sh_movrt_set_dest (const rtx_insn* i)
11787 return i == NULL ? NULL : sh_movrt_set_dest (PATTERN (i));
11791 sh_movrt_set_dest (const_rtx pat)
11793 /* The negc movrt replacement is inside a parallel. */
11794 if (GET_CODE (pat) == PARALLEL)
11795 pat = XVECEXP (pat, 0, 0);
11797 return GET_CODE (pat) == SET
11798 && arith_reg_dest (XEXP (pat, 0), SImode)
11799 && negt_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11803 /* Given an insn and a reg number, tell whether the reg dies or is unused
11804 after the insn. */
11805 bool
11806 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
11808 return find_regno_note (i, REG_DEAD, regno) != NULL
11809 || find_regno_note (i, REG_UNUSED, regno) != NULL;
11812 /* Given an insn and a reg number, remove reg dead or reg unused notes to
11813 mark it as being used after the insn. */
11814 void
11815 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
11817 if (rtx n = find_regno_note (i, REG_DEAD, regno))
11818 remove_note (i, n);
11819 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
11820 remove_note (i, n);
11823 /* Given an insn check if it contains any post/pre inc/dec mem operands and
11824 add the REG_INC notes accordingly.
11825 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
11826 FIXME: This function is currently used by peephole2 patterns because
11827 the peephole2 pass does not preserve REG_INC notes. If the notes
11828 are dropped the following passes will do wrong things. */
11829 rtx_insn*
11830 sh_check_add_incdec_notes (rtx_insn* i)
11832 struct for_each_inc_dec_clb
11834 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
11835 rtx dest, rtx src ATTRIBUTE_UNUSED,
11836 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
11838 gcc_assert (REG_P (dest));
11840 rtx_insn* i = (rtx_insn*)arg;
11841 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
11842 add_reg_note (i, REG_INC, dest);
11844 return 0;
11848 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
11849 return i;
11852 /* Given a move insn destiation and a source, make sure that the move source
11853 operand is not a post-inc mem load with the same address reg as the
11854 destination. Returns the modified source operand with the post-inc removed
11855 if necessary. */
11857 sh_remove_overlapping_post_inc (rtx dst, rtx src)
11859 if (!MEM_P (src))
11860 return src;
11862 rtx addr = XEXP (src, 0);
11864 if (GET_CODE (addr) == POST_INC
11865 && reg_overlap_mentioned_p (XEXP (addr, 0), dst))
11866 return replace_equiv_address (src, XEXP (addr, 0));
11868 gcc_assert (GET_CODE (addr) != POST_MODIFY);
11869 return src;
11872 /* Emit a move insn that is safe to be used in peephole patterns. */
11873 rtx_insn*
11874 sh_peephole_emit_move_insn (rtx dst, rtx src)
11876 return sh_check_add_incdec_notes (
11877 emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src)));
11880 /* Given an op rtx and an insn, try to find out whether the result of the
11881 specified op consists only of logical operations on T bit stores. */
11882 bool
11883 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
11885 if (!logical_operator (op, SImode))
11886 return false;
11888 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
11889 int op_is_t_count = 0;
11891 for (int i = 0; i < 2; ++i)
11893 if (t_reg_operand (ops[i], VOIDmode)
11894 || negt_reg_operand (ops[i], VOIDmode))
11895 op_is_t_count++;
11897 else
11899 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
11900 prev_nonnote_insn_bb);
11901 if (op_set.set_src == NULL_RTX)
11902 continue;
11904 if (t_reg_operand (op_set.set_src, VOIDmode)
11905 || negt_reg_operand (op_set.set_src, VOIDmode)
11906 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
11907 op_is_t_count++;
11911 return op_is_t_count == 2;
11914 /* Given the operand that is extended in a sign/zero extend insn, and the
11915 insn, try to figure out whether the sign/zero extension can be replaced
11916 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
11917 NULL_RTX otherwise. */
11919 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
11921 if (REG_P (extended_op))
11922 extended_op = extended_op;
11923 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
11924 extended_op = SUBREG_REG (extended_op);
11925 else
11926 return NULL_RTX;
11928 /* Reg moves must be of the same mode. */
11929 if (GET_MODE (extended_op) != SImode)
11930 return NULL_RTX;
11932 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
11933 if (s.set_src == NULL_RTX)
11934 return NULL_RTX;
11936 if (t_reg_operand (s.set_src, VOIDmode)
11937 || negt_reg_operand (s.set_src, VOIDmode))
11938 return extended_op;
11940 /* If the zero extended reg was formed by a logical operation, check the
11941 operands of the logical operation. If both originated from T bit
11942 stores the zero extension can be eliminated. */
11943 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
11944 return extended_op;
11946 return NULL_RTX;
11949 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
11950 figure out whether it should be converted into a movt-xor sequence in
11951 the movrt_negc splitter.
11952 Returns true if insns have been modified and the splitter has succeeded. */
11953 bool
11954 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
11956 /* In cases such as
11957 tst r4,r4
11958 mov #-1,r1
11959 negc r1,r1
11960 tst r4,r4
11961 we can replace the T bit clobbering negc with a movt-xor sequence and
11962 eliminate the redundant comparison.
11963 Because the xor insn depends on register allocation results, allow this
11964 only before reload. */
11965 if (!can_create_pseudo_p ())
11966 return false;
11968 set_of_reg t_before_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
11969 prev_nonnote_insn_bb);
11970 set_of_reg t_after_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
11971 next_nonnote_insn_bb);
11973 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
11974 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
11975 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11976 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
11977 t_before_negc.insn,
11978 t_after_negc.insn)
11979 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11980 && !sh_unspec_insn_p (t_after_negc.insn)
11981 && !volatile_insn_p (PATTERN (t_after_negc.insn))
11982 && !side_effects_p (PATTERN (t_after_negc.insn))
11983 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
11985 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
11986 set_insn_deleted (t_after_negc.insn);
11987 return true;
11989 else
11990 return false;
11993 /* Given a reg and the current insn, see if the value of the reg originated
11994 from a sign or zero extension and return the discovered information. */
11995 sh_extending_set_of_reg
11996 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
11998 if (reg == NULL)
11999 return sh_extending_set_of_reg (curr_insn);
12001 if (SUBREG_P (reg))
12002 reg = SUBREG_REG (reg);
12004 if (!REG_P (reg))
12005 return sh_extending_set_of_reg (curr_insn);
12007 /* FIXME: Also search the predecessor basic blocks. It seems that checking
12008 only the adjacent predecessor blocks would cover most of the cases.
12009 Also try to look through the first extension that we hit. There are some
12010 cases, where a zero_extend is followed an (implicit) sign_extend, and it
12011 fails to see the sign_extend. */
12012 sh_extending_set_of_reg result =
12013 sh_find_set_of_reg (reg, curr_insn, prev_nonnote_insn_bb, true);
12015 if (result.set_src != NULL)
12017 if (GET_CODE (result.set_src) == SIGN_EXTEND
12018 || GET_CODE (result.set_src) == ZERO_EXTEND)
12020 if (dump_file)
12021 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12022 "explicitly sign/zero extended in insn %d\n",
12023 REGNO (reg), INSN_UID (result.insn));
12024 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
12025 result.ext_code = GET_CODE (result.set_src);
12027 else if (MEM_P (result.set_src)
12028 && (GET_MODE (result.set_src) == QImode
12029 || GET_MODE (result.set_src) == HImode)
12030 && !sh_unspec_insn_p (result.insn))
12032 /* On SH QIHImode memory loads always sign extend. However, in
12033 some cases where it seems that the higher bits are not
12034 interesting, the loads will not be expanded as sign extending
12035 insns, but as QIHImode loads into QIHImode regs. We report that
12036 the reg has been sign extended by the mem load. When it is used
12037 as such, we must convert the mem load into a sign extending insn,
12038 see also sh_extending_set_of_reg::use_as_extended_reg. */
12039 if (dump_file)
12040 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12041 "implicitly sign extended in insn %d\n",
12042 REGNO (reg), INSN_UID (result.insn));
12043 result.from_mode = GET_MODE (result.set_src);
12044 result.ext_code = SIGN_EXTEND;
12048 return result;
12051 /* Given a reg that is known to be sign or zero extended at some insn,
12052 take the appropriate measures so that the extended value can be used as
12053 a reg at the specified insn and return the resulting reg rtx. */
12055 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
12057 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
12058 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
12059 gcc_assert (from_mode == QImode || from_mode == HImode);
12061 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
12063 if (dump_file)
12064 fprintf (dump_file,
12065 "use_as_extended_reg: converting non-extending mem load in "
12066 "insn %d into sign-extending load\n", INSN_UID (insn));
12068 rtx r = gen_reg_rtx (SImode);
12069 rtx_insn* i0;
12070 if (from_mode == QImode)
12071 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
12072 else if (from_mode == HImode)
12073 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
12074 else
12075 gcc_unreachable ();
12077 emit_insn_after (
12078 gen_move_insn (XEXP (set_rtx, 0),
12079 gen_lowpart (GET_MODE (set_src), r)), i0);
12080 set_insn_deleted (insn);
12081 return r;
12083 else
12085 rtx extension_dst = XEXP (set_rtx, 0);
12086 if (GET_MODE (extension_dst) != SImode)
12087 extension_dst = simplify_gen_subreg (SImode, extension_dst,
12088 GET_MODE (extension_dst), 0);
12089 if (modified_between_p (extension_dst, insn, use_at_insn))
12091 if (dump_file)
12092 fprintf (dump_file,
12093 "use_as_extended_reg: dest reg %d of extending insn %d is "
12094 "modified, inserting a reg-reg copy\n",
12095 REGNO (extension_dst), INSN_UID (insn));
12097 rtx r = gen_reg_rtx (SImode);
12098 emit_insn_after (gen_move_insn (r, extension_dst), insn);
12099 return r;
12101 else
12103 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
12104 return extension_dst;
12109 bool
12110 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
12112 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
12113 && (from_mode == QImode || from_mode == HImode)
12114 && set_src != NULL)
12115 return arith_reg_operand (XEXP (set_src, 0), from_mode);
12116 else
12117 return false;
12121 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
12123 gcc_assert (can_use_as_unextended_reg ());
12125 rtx r = XEXP (set_src, 0);
12126 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
12128 if (modified_between_p (r, insn, use_at_insn))
12130 rtx r1 = gen_reg_rtx (SImode);
12131 emit_insn_after (gen_move_insn (r1, r0), insn);
12132 return r1;
12134 else
12136 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
12137 ? REGNO (SUBREG_REG (r))
12138 : REGNO (r));
12139 return r0;
12143 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
12144 perform the necessary checks on the operands and split it accordingly. */
12145 void
12146 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
12147 int subreg_offset, rtx operands[])
12149 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
12151 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
12152 curr_insn);
12153 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
12154 curr_insn);
12156 /* If one of the operands is known to be zero extended, that's already
12157 sufficient to mask out the unwanted high bits. */
12158 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
12160 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12161 operands[1]));
12162 return;
12164 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
12166 emit_insn (gen_tstsi_t (operands[0],
12167 eop1.use_as_extended_reg (curr_insn)));
12168 return;
12171 /* None of the operands seem to be zero extended.
12172 If both are sign extended it's OK, too. */
12173 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
12174 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
12176 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12177 eop1.use_as_extended_reg (curr_insn)));
12178 return;
12181 /* Otherwise we have to insert a zero extension on one of the operands to
12182 mask out the unwanted high bits.
12183 Prefer the operand that has no known extension. */
12184 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
12185 std::swap (operands[0], operands[1]);
12187 rtx tmp0 = gen_reg_rtx (SImode);
12188 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
12189 GET_MODE (operands[0]), subreg_offset);
12190 emit_insn (subreg_mode == QImode
12191 ? gen_zero_extendqisi2 (tmp0, tmp1)
12192 : gen_zero_extendhisi2 (tmp0, tmp1));
12193 emit_insn (gen_tstsi_t (tmp0, operands[1]));
12196 /* A helper class to increment/decrement a counter variable each time a
12197 function is entered/left. */
12198 class scope_counter
12200 public:
12201 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
12203 ~scope_counter (void)
12205 --m_counter;
12206 gcc_assert (m_counter >= 0);
12209 int count (void) const { return m_counter; }
12211 private:
12212 int& m_counter;
12215 /* Given an rtx x, determine whether the expression can be used to create
12216 an insn that calulates x and stores the result in the T bit.
12217 This is used by the 'treg_set_expr' predicate to construct insns sequences
12218 where T bit results are fed into other insns, such as addc, subc, negc
12219 insns.
12221 FIXME: The patterns that expand 'treg_set_expr' operands tend to
12222 distinguish between 'positive' and 'negative' forms. For now this has to
12223 be done in the preparation code. We could also introduce
12224 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
12225 two different patterns for the 'postive' and 'negative' forms. However,
12226 the total amount of lines of code seems to be about the same and the
12227 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
12228 recog function would need to look inside the expression by temporarily
12229 splitting it. */
12230 static int sh_recog_treg_set_expr_reent_count = 0;
12232 bool
12233 sh_recog_treg_set_expr (rtx op, machine_mode mode)
12235 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
12237 /* Limit the recursion count to avoid nested expressions which we can't
12238 resolve to a single treg set insn. */
12239 if (recursion.count () > 1)
12240 return false;
12242 /* Early accept known possible operands before doing recog. */
12243 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode)
12244 || negt_reg_operand (op, mode))
12245 return true;
12247 /* Early reject impossible operands before doing recog.
12248 There are some (set ((t) (subreg ...))) patterns, but we must be careful
12249 not to allow any invalid reg-reg or mem-reg moves, or else other passes
12250 such as lower-subreg will bail out. Some insns such as SH4A movua are
12251 done with UNSPEC, so must reject those, too, or else it would result
12252 in an invalid reg -> treg move. */
12253 if (CONST_INT_P (op) || register_operand (op, mode)
12254 || memory_operand (op, mode) || sh_unspec_insn_p (op))
12255 return false;
12257 if (!can_create_pseudo_p ())
12258 return false;
12260 /* expand_debug_locations may call this to compute rtx costs at
12261 very early stage. In that case, don't make new insns here to
12262 avoid codegen differences with -g. */
12263 if (currently_expanding_to_rtl)
12264 return false;
12266 /* We are going to invoke recog in a re-entrant way and thus
12267 have to capture its current state and restore it afterwards. */
12268 recog_data_d prev_recog_data = recog_data;
12270 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
12271 SET_PREV_INSN (i) = NULL;
12272 SET_NEXT_INSN (i) = NULL;
12274 /* If the comparison op doesn't have a result mode, set it to SImode. */
12275 machine_mode prev_op_mode = GET_MODE (op);
12276 if (COMPARISON_P (op) && prev_op_mode == VOIDmode)
12277 PUT_MODE (op, SImode);
12279 int result = recog (PATTERN (i), i, 0);
12281 /* It seems there is no insn like that. Create a negated version and
12282 try again. If we hit a negated form, we'll allow that and append a
12283 nott sequence when splitting out the insns. Insns that do the split
12284 can then remove the trailing nott if they know how to deal with it. */
12285 if (result < 0 && COMPARISON_P (op))
12287 machine_mode cmp_mode = GET_MODE (XEXP (op, 0));
12288 if (cmp_mode == VOIDmode)
12289 cmp_mode = GET_MODE (XEXP (op, 1));
12291 rtx_code prev_code = GET_CODE (op);
12292 PUT_CODE (op, reverse_condition (GET_CODE (op)));
12293 result = recog (PATTERN (i), i, 0);
12294 PUT_CODE (op, prev_code);
12297 PUT_MODE (op, prev_op_mode);
12298 recog_data = prev_recog_data;
12299 return result >= 0;
12302 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
12303 This can be used as a condition for insn/split patterns to allow certain
12304 T bit setting patters only to be matched as sub expressions of other
12305 patterns. */
12306 bool
12307 sh_in_recog_treg_set_expr (void)
12309 return sh_recog_treg_set_expr_reent_count > 0;
12312 /* Given an rtx x, which is assumed to be some expression that has been
12313 matched by the 'treg_set_expr' predicate before, split and emit the
12314 insns that are necessary to calculate the expression and store the result
12315 in the T bit.
12316 The splitting is done recursively similar to 'try_split' in emit-rt.c.
12317 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
12318 'delete_insn' which then causes the DF parts to bail out, because we
12319 currently are inside another gen_split* function and would invoke
12320 'try_split' in a reentrant way. */
12321 static std::pair<rtx_insn*, rtx_insn*>
12322 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
12324 if (dump_file)
12326 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
12327 print_rtl_single (dump_file, i);
12328 fprintf (dump_file, "\n");
12331 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
12333 if (seq == NULL)
12334 return std::make_pair (i, i);
12336 /* Avoid infinite splitter loops if any insn of the result matches
12337 the original pattern. */
12338 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
12339 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
12340 return std::make_pair (i, i);
12342 unshare_all_rtl_in_chain (seq);
12344 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
12345 a linked list, replace the single insn with the new insns. */
12346 rtx_insn* seqlast = seq;
12347 while (NEXT_INSN (seqlast) != NULL)
12348 seqlast = NEXT_INSN (seqlast);
12350 if (rtx_insn* iprev = PREV_INSN (i))
12351 SET_NEXT_INSN (iprev) = seq;
12352 if (rtx_insn* inext = NEXT_INSN (i))
12353 SET_PREV_INSN (inext) = seqlast;
12355 SET_PREV_INSN (seq) = PREV_INSN (i);
12356 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
12358 SET_PREV_INSN (i) = NULL;
12359 SET_NEXT_INSN (i) = NULL;
12361 /* Recursively split all insns. */
12362 for (i = seq; ; i = NEXT_INSN (i))
12364 std::pair<rtx_insn*, rtx_insn*> ii =
12365 sh_try_split_insn_simple (i, curr_insn, n + 1);
12366 if (i == seq)
12367 seq = ii.first;
12368 if (i == seqlast)
12370 seqlast = ii.second;
12371 break;
12373 i = ii.first;
12376 return std::make_pair (seq, seqlast);
12379 sh_treg_insns
12380 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
12382 if (t_reg_operand (x, VOIDmode))
12383 return sh_treg_insns ();
12385 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
12387 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
12388 SET_PREV_INSN (i) = NULL;
12389 SET_NEXT_INSN (i) = NULL;
12391 if (dump_file)
12393 fprintf (dump_file, "split_treg_set_expr insn:\n");
12394 print_rtl (dump_file, i);
12395 fprintf (dump_file, "\n");
12398 /* If the insn is not found, we will try a negated form and append
12399 a nott. */
12400 bool append_nott = false;
12402 /* We are going to invoke recog/split_insns in a re-entrant way and thus
12403 have to capture its current state and restore it afterwards. */
12404 recog_data_d prev_recog_data = recog_data;
12406 if (negt_reg_operand (x, GET_MODE (x)))
12408 /* This is a normal movt followed by a nott. It will be converted
12409 into a movrt after initial expansion. */
12410 XEXP (PATTERN (i), 1) = get_t_reg_rtx ();
12411 append_nott = true;
12413 else
12415 /* If the comparison op doesn't have a mode set, set it to SImode. */
12416 if (COMPARISON_P (x) && GET_MODE (x) == VOIDmode)
12417 PUT_MODE (x, SImode);
12419 int insn_code = recog (PATTERN (i), i, 0);
12421 if (insn_code < 0 && COMPARISON_P (x))
12423 machine_mode cmp_mode = GET_MODE (XEXP (x, 0));
12424 if (cmp_mode == VOIDmode)
12425 cmp_mode = GET_MODE (XEXP (x, 1));
12427 PUT_CODE (x, reverse_condition (GET_CODE (x)));
12428 insn_code = recog (PATTERN (i), i, 0);
12429 append_nott = true;
12432 gcc_assert (insn_code >= 0);
12435 /* Try to recursively split the insn. Some insns might refuse to split
12436 any further while we are in the treg_set_expr splitting phase. They
12437 will be emitted as part of the outer insn and then split again. */
12438 std::pair<rtx_insn*, rtx_insn*> insnlist =
12439 sh_try_split_insn_simple (i, curr_insn);
12441 /* Restore recog state. */
12442 recog_data = prev_recog_data;
12444 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
12445 ? insnlist.second
12446 : NULL;
12447 if (dump_file)
12449 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
12450 print_rtl (dump_file, insnlist.first);
12451 fprintf (dump_file, "\n");
12453 if (nott_insn != NULL)
12454 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
12457 emit_insn (insnlist.first);
12459 if (nott_insn != NULL && append_nott)
12461 if (dump_file)
12462 fprintf (dump_file, "removing trailing nott\n");
12463 remove_insn (nott_insn);
12464 nott_insn = NULL;
12465 append_nott = false;
12468 if (append_nott)
12469 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
12471 rtx_insn* first_insn = get_insns ();
12473 if (dump_file)
12475 fprintf (dump_file, "resulting insns:\n");
12476 print_rtl (dump_file, first_insn);
12477 fprintf (dump_file, "\n");
12480 return sh_treg_insns (first_insn, nott_insn);
12483 /*------------------------------------------------------------------------------
12484 Mode switching support code.
12487 static void
12488 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
12489 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
12491 if ((TARGET_SH4A_FP || TARGET_SH4_300)
12492 && prev_mode != FP_MODE_NONE && prev_mode != mode)
12494 emit_insn (gen_toggle_pr ());
12495 if (TARGET_FMOVD)
12496 emit_insn (gen_toggle_sz ());
12498 else if (mode != FP_MODE_NONE)
12500 rtx tmp = gen_reg_rtx (SImode);
12501 emit_insn (gen_sts_fpscr (tmp));
12502 rtx i = NULL;
12504 const unsigned HOST_WIDE_INT fpbits =
12505 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
12507 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
12508 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12509 else if (mode == FP_MODE_SINGLE)
12510 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
12511 else if (mode == FP_MODE_DOUBLE)
12512 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12513 else
12514 gcc_unreachable ();
12516 emit_insn (i);
12517 emit_insn (gen_lds_fpscr (tmp));
12521 static int
12522 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
12524 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
12527 static int
12528 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
12530 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
12531 get_attr_fp_set (insn) != FP_SET_NONE)
12532 return (int) get_attr_fp_set (insn);
12533 else
12534 return mode;
12537 static int
12538 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
12540 return NORMAL_MODE (entity);
12543 static int
12544 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
12546 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
12549 static int
12550 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
12552 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
12555 /*------------------------------------------------------------------------------
12556 Misc
12559 /* Return true if we use LRA instead of reload pass. */
12560 bool
12561 sh_lra_p (void)
12563 return sh_lra_flag;
12566 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
12568 static bool
12569 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
12570 unsigned int align,
12571 enum by_pieces_operation op,
12572 bool speed_p)
12574 switch (op)
12576 case MOVE_BY_PIECES:
12577 return by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1, op)
12578 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12579 case STORE_BY_PIECES:
12580 case SET_BY_PIECES:
12581 return by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1, op)
12582 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12583 default:
12584 return default_use_by_pieces_infrastructure_p (size, align,
12585 op, speed_p);
12589 bool
12590 sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
12591 rtx x ATTRIBUTE_UNUSED)
12593 return TARGET_FDPIC;
12596 /* Emit insns to load the function address from FUNCDESC (an FDPIC
12597 function descriptor) into r1 and the GOT address into r12,
12598 returning an rtx for r1. */
12601 sh_load_function_descriptor (rtx funcdesc)
12603 rtx r1 = gen_rtx_REG (Pmode, R1_REG);
12604 rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
12605 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
12606 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
12608 emit_move_insn (r1, fnaddr);
12609 /* The ABI requires the entry point address to be loaded first, so
12610 prevent the load from being moved after that of the GOT
12611 address. */
12612 emit_insn (gen_blockage ());
12613 emit_move_insn (pic_reg, gotaddr);
12614 return r1;
12617 /* Return an rtx holding the initial value of the FDPIC register (the
12618 FDPIC pointer passed in from the caller). */
12621 sh_get_fdpic_reg_initial_val (void)
12623 return get_hard_reg_initial_val (Pmode, PIC_REG);
12626 #include "gt-sh.h"