Turn CANNOT_CHANGE_MODE_CLASS into a hook
[official-gcc.git] / gcc / config / sh / sh.c
blobfa9f9ad17a6cdceaff3260d631d3a6ee21c4b27e
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2017 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include <sstream>
24 #include "config.h"
25 #define INCLUDE_VECTOR
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "gimple.h"
33 #include "cfghooks.h"
34 #include "df.h"
35 #include "memmodel.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "emit-rtl.h"
41 #include "recog.h"
42 #include "diagnostic-core.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
46 #include "calls.h"
47 #include "varasm.h"
48 #include "flags.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "reload.h"
52 #include "output.h"
53 #include "insn-attr.h"
54 #include "dwarf2.h"
55 #include "langhooks.h"
56 #include "cfgrtl.h"
57 #include "intl.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "tm-constrs.h"
61 #include "opts.h"
62 #include "tree-pass.h"
63 #include "context.h"
64 #include "builtins.h"
65 #include "rtl-iter.h"
66 #include "regs.h"
68 /* This file should be included last. */
69 #include "target-def.h"
71 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
73 #define CONST_OK_FOR_ADD(size) CONST_OK_FOR_I08 (size)
74 #define GEN_MOV (*(gen_movsi))
75 #define GEN_ADD3 (*(gen_addsi3))
76 #define GEN_SUB3 (*(gen_subsi3))
78 /* Used to simplify the logic below. Find the attributes wherever
79 they may be. */
80 #define SH_ATTRIBUTES(decl) \
81 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
82 : DECL_ATTRIBUTES (decl) \
83 ? (DECL_ATTRIBUTES (decl)) \
84 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
86 /* Set to true by expand_prologue() when the function is an
87 interrupt handler. */
88 bool current_function_interrupt;
90 tree sh_deferred_function_attributes;
91 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
93 /* Global variables for machine-dependent things. */
95 /* Which cpu are we scheduling for. */
96 enum processor_type sh_cpu;
98 /* Definitions used in ready queue reordering for first scheduling pass. */
100 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
101 static short *regmode_weight[2];
103 /* Total SFmode and SImode weights of scheduled insns. */
104 static int curr_regmode_pressure[2];
106 /* Number of r0 life regions. */
107 static int r0_life_regions;
109 /* If true, skip cycles for Q -> R movement. */
110 static int skip_cycles = 0;
112 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
113 and returned from sh_reorder2. */
114 static short cached_can_issue_more;
116 /* Unique number for UNSPEC_BBR pattern. */
117 static unsigned int unspec_bbr_uid = 1;
119 /* Provides the class number of the smallest class containing
120 reg number. */
121 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
123 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
156 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
157 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
158 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
159 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
160 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
161 GENERAL_REGS, GENERAL_REGS,
164 char sh_register_names[FIRST_PSEUDO_REGISTER] \
165 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
167 char sh_additional_register_names[ADDREGNAMES_SIZE] \
168 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
169 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
171 int assembler_dialect;
173 static void split_branches (rtx_insn *);
174 static int branch_dest (rtx);
175 static void print_slot (rtx_sequence *);
176 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
177 static void dump_table (rtx_insn *, rtx_insn *);
178 static bool broken_move (rtx_insn *);
179 static bool mova_p (rtx_insn *);
180 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
181 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
182 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
183 static void sh_reorg (void);
184 static void sh_option_override (void);
185 static void sh_override_options_after_change (void);
186 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
187 static rtx_insn* emit_frame_insn (rtx);
188 static rtx push (int);
189 static void pop (int);
190 static void push_regs (HARD_REG_SET* mask, bool interrupt_handler);
191 static int calc_live_regs (HARD_REG_SET *);
192 static HOST_WIDE_INT rounded_frame_size (int);
193 static bool sh_frame_pointer_required (void);
194 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
195 static int sh_mode_needed (int, rtx_insn *);
196 static int sh_mode_after (int, int, rtx_insn *);
197 static int sh_mode_entry (int);
198 static int sh_mode_exit (int);
199 static int sh_mode_priority (int entity, int n);
201 static rtx mark_constant_pool_use (rtx);
202 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
203 int, bool *);
204 static tree sh_handle_resbank_handler_attribute (tree *, tree,
205 tree, int, bool *);
206 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
207 tree, int, bool *);
208 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
209 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
210 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
211 static void sh_print_operand (FILE *, rtx, int);
212 static void sh_print_operand_address (FILE *, machine_mode, rtx);
213 static bool sh_print_operand_punct_valid_p (unsigned char code);
214 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
215 static void sh_output_function_epilogue (FILE *);
216 static void sh_insert_attributes (tree, tree *);
217 static const char *sh_check_pch_target_flags (int);
218 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
219 static int sh_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
220 static int sh_issue_rate (void);
221 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
222 static short find_set_regmode_weight (rtx, machine_mode);
223 static short find_insn_regmode_weight (rtx, machine_mode);
224 static void find_regmode_weight (basic_block, machine_mode);
225 static int find_r0_life_regions (basic_block);
226 static void sh_md_init_global (FILE *, int, int);
227 static void sh_md_finish_global (FILE *, int);
228 static int rank_for_reorder (const void *, const void *);
229 static void swap_reorder (rtx_insn **, int);
230 static void ready_reorder (rtx_insn **, int);
231 static bool high_pressure (machine_mode);
232 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
233 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
234 static void sh_md_init (FILE *, int, int);
235 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
237 static bool sh_function_ok_for_sibcall (tree, tree);
239 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
240 static bool sh_ms_bitfield_layout_p (const_tree);
242 static void sh_init_builtins (void);
243 static tree sh_builtin_decl (unsigned, bool);
244 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
245 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
246 HOST_WIDE_INT, tree);
247 static void sh_file_start (void);
248 static bool sh_assemble_integer (rtx, unsigned int, int);
249 static bool flow_dependent_p (rtx, rtx);
250 static void flow_dependent_p_1 (rtx, const_rtx, void *);
251 static int shiftcosts (rtx);
252 static int and_xor_ior_costs (rtx, int);
253 static int addsubcosts (rtx);
254 static int multcosts (rtx);
255 static bool unspec_caller_rtx_p (rtx);
256 static bool sh_cannot_copy_insn_p (rtx_insn *);
257 static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
258 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
259 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
260 static int sh_pr_n_sets (void);
261 static rtx sh_allocate_initial_value (rtx);
262 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
263 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
264 machine_mode,
265 struct secondary_reload_info *);
266 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
267 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
268 static rtx sh_delegitimize_address (rtx);
269 static bool sh_cannot_substitute_mem_equiv_p (rtx);
270 static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode);
271 static int scavenge_reg (HARD_REG_SET *s);
273 static rtx sh_struct_value_rtx (tree, int);
274 static rtx sh_function_value (const_tree, const_tree, bool);
275 static bool sh_function_value_regno_p (const unsigned int);
276 static rtx sh_libcall_value (machine_mode, const_rtx);
277 static bool sh_return_in_memory (const_tree, const_tree);
278 static rtx sh_builtin_saveregs (void);
279 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
280 tree, int *, int);
281 static bool sh_strict_argument_naming (cumulative_args_t);
282 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
283 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
284 static tree sh_build_builtin_va_list (void);
285 static void sh_va_start (tree, rtx);
286 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
287 static bool sh_promote_prototypes (const_tree);
288 static machine_mode sh_promote_function_mode (const_tree type,
289 machine_mode,
290 int *punsignedp,
291 const_tree funtype,
292 int for_return);
293 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
294 const_tree, bool);
295 static bool sh_callee_copies (cumulative_args_t, machine_mode,
296 const_tree, bool);
297 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
298 tree, bool);
299 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
300 const_tree, bool);
301 static rtx sh_function_arg (cumulative_args_t, machine_mode,
302 const_tree, bool);
303 static int sh_dwarf_calling_convention (const_tree);
304 static void sh_encode_section_info (tree, rtx, int);
305 static bool sh2a_function_vector_p (tree);
306 static void sh_trampoline_init (rtx, tree, rtx);
307 static rtx sh_trampoline_adjust_address (rtx);
308 static void sh_conditional_register_usage (void);
309 static bool sh_legitimate_constant_p (machine_mode, rtx);
310 static int mov_insn_size (machine_mode, bool);
311 static int mov_insn_alignment_mask (machine_mode, bool);
312 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
313 unsigned int,
314 enum by_pieces_operation,
315 bool);
316 static bool sequence_insn_p (rtx_insn *);
317 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
318 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
319 machine_mode, bool);
320 static bool sh_legitimate_combined_insn (rtx_insn* insn);
322 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
324 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
325 static unsigned int sh_hard_regno_nregs (unsigned int, machine_mode);
326 static bool sh_hard_regno_mode_ok (unsigned int, machine_mode);
327 static bool sh_modes_tieable_p (machine_mode, machine_mode);
328 static bool sh_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
330 static const struct attribute_spec sh_attribute_table[] =
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
333 affects_type_identity } */
334 { "interrupt_handler", 0, 0, true, false, false,
335 sh_handle_interrupt_handler_attribute, false },
336 { "sp_switch", 1, 1, true, false, false,
337 sh_handle_sp_switch_attribute, false },
338 { "trap_exit", 1, 1, true, false, false,
339 sh_handle_trap_exit_attribute, false },
340 { "renesas", 0, 0, false, true, false,
341 sh_handle_renesas_attribute, false },
342 { "trapa_handler", 0, 0, true, false, false,
343 sh_handle_interrupt_handler_attribute, false },
344 { "nosave_low_regs", 0, 0, true, false, false,
345 sh_handle_interrupt_handler_attribute, false },
346 { "resbank", 0, 0, true, false, false,
347 sh_handle_resbank_handler_attribute, false },
348 { "function_vector", 1, 1, true, false, false,
349 sh2a_handle_function_vector_handler_attribute, false },
350 { NULL, 0, 0, false, false, false, NULL, false }
353 /* Initialize the GCC target structure. */
354 #undef TARGET_ATTRIBUTE_TABLE
355 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
357 /* The next two are used for debug info when compiling with -gdwarf. */
358 #undef TARGET_ASM_UNALIGNED_HI_OP
359 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
360 #undef TARGET_ASM_UNALIGNED_SI_OP
361 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
363 #undef TARGET_OPTION_OVERRIDE
364 #define TARGET_OPTION_OVERRIDE sh_option_override
366 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
367 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
368 sh_override_options_after_change
370 #undef TARGET_PRINT_OPERAND
371 #define TARGET_PRINT_OPERAND sh_print_operand
372 #undef TARGET_PRINT_OPERAND_ADDRESS
373 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
374 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
375 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
376 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
377 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
379 #undef TARGET_ASM_FUNCTION_EPILOGUE
380 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
382 #undef TARGET_ASM_OUTPUT_MI_THUNK
383 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
385 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
386 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
387 hook_bool_const_tree_hwi_hwi_const_tree_true
389 #undef TARGET_ASM_FILE_START
390 #define TARGET_ASM_FILE_START sh_file_start
391 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
392 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
394 #undef TARGET_ASM_INTEGER
395 #define TARGET_ASM_INTEGER sh_assemble_integer
397 #undef TARGET_REGISTER_MOVE_COST
398 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
400 #undef TARGET_INSERT_ATTRIBUTES
401 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
403 #undef TARGET_SCHED_ADJUST_COST
404 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
406 #undef TARGET_SCHED_ISSUE_RATE
407 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
409 /* The next 5 hooks have been implemented for reenabling sched1. With the
410 help of these macros we are limiting the movement of insns in sched1 to
411 reduce the register pressure. The overall idea is to keep count of SImode
412 and SFmode regs required by already scheduled insns. When these counts
413 cross some threshold values; give priority to insns that free registers.
414 The insn that frees registers is most likely to be the insn with lowest
415 LUID (original insn order); but such an insn might be there in the stalled
416 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
417 up to a max of 8 cycles so that such insns may move from Q -> R.
419 The description of the hooks are as below:
421 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
422 scheduler; it is called inside the sched_init function just after
423 find_insn_reg_weights function call. It is used to calculate the SImode
424 and SFmode weights of insns of basic blocks; much similar to what
425 find_insn_reg_weights does.
426 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
428 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
429 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
430 (Q)->(R).
432 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
433 high; reorder the ready queue so that the insn with lowest LUID will be
434 issued next.
436 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
437 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
439 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
440 can be returned from TARGET_SCHED_REORDER2.
442 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
444 #undef TARGET_SCHED_DFA_NEW_CYCLE
445 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
447 #undef TARGET_SCHED_INIT_GLOBAL
448 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
450 #undef TARGET_SCHED_FINISH_GLOBAL
451 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
453 #undef TARGET_SCHED_VARIABLE_ISSUE
454 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
456 #undef TARGET_SCHED_REORDER
457 #define TARGET_SCHED_REORDER sh_reorder
459 #undef TARGET_SCHED_REORDER2
460 #define TARGET_SCHED_REORDER2 sh_reorder2
462 #undef TARGET_SCHED_INIT
463 #define TARGET_SCHED_INIT sh_md_init
465 #undef TARGET_DELEGITIMIZE_ADDRESS
466 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
468 #undef TARGET_LEGITIMIZE_ADDRESS
469 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
471 #undef TARGET_CAN_FOLLOW_JUMP
472 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
474 #undef TARGET_MS_BITFIELD_LAYOUT_P
475 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
477 #undef TARGET_INIT_BUILTINS
478 #define TARGET_INIT_BUILTINS sh_init_builtins
479 #undef TARGET_BUILTIN_DECL
480 #define TARGET_BUILTIN_DECL sh_builtin_decl
481 #undef TARGET_EXPAND_BUILTIN
482 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
484 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
485 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
487 #undef TARGET_CANNOT_COPY_INSN_P
488 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
489 #undef TARGET_RTX_COSTS
490 #define TARGET_RTX_COSTS sh_rtx_costs
491 #undef TARGET_ADDRESS_COST
492 #define TARGET_ADDRESS_COST sh_address_cost
493 #undef TARGET_ALLOCATE_INITIAL_VALUE
494 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
496 #undef TARGET_MACHINE_DEPENDENT_REORG
497 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
499 #undef TARGET_DWARF_REGISTER_SPAN
500 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
502 #ifdef HAVE_AS_TLS
503 #undef TARGET_HAVE_TLS
504 #define TARGET_HAVE_TLS true
505 #endif
507 #undef TARGET_PROMOTE_PROTOTYPES
508 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
509 #undef TARGET_PROMOTE_FUNCTION_MODE
510 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
512 #undef TARGET_FUNCTION_VALUE
513 #define TARGET_FUNCTION_VALUE sh_function_value
514 #undef TARGET_FUNCTION_VALUE_REGNO_P
515 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
516 #undef TARGET_LIBCALL_VALUE
517 #define TARGET_LIBCALL_VALUE sh_libcall_value
518 #undef TARGET_STRUCT_VALUE_RTX
519 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
520 #undef TARGET_RETURN_IN_MEMORY
521 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
523 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
524 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
525 #undef TARGET_SETUP_INCOMING_VARARGS
526 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
527 #undef TARGET_STRICT_ARGUMENT_NAMING
528 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
529 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
530 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
531 #undef TARGET_MUST_PASS_IN_STACK
532 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
533 #undef TARGET_PASS_BY_REFERENCE
534 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
535 #undef TARGET_CALLEE_COPIES
536 #define TARGET_CALLEE_COPIES sh_callee_copies
537 #undef TARGET_ARG_PARTIAL_BYTES
538 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
539 #undef TARGET_FUNCTION_ARG
540 #define TARGET_FUNCTION_ARG sh_function_arg
541 #undef TARGET_FUNCTION_ARG_ADVANCE
542 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
544 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
545 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
547 #undef TARGET_BUILD_BUILTIN_VA_LIST
548 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
549 #undef TARGET_EXPAND_BUILTIN_VA_START
550 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
551 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
552 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
554 #undef TARGET_VECTOR_MODE_SUPPORTED_P
555 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
557 #undef TARGET_CHECK_PCH_TARGET_FLAGS
558 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
560 #undef TARGET_DWARF_CALLING_CONVENTION
561 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
563 #undef TARGET_FRAME_POINTER_REQUIRED
564 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
566 #undef TARGET_MODE_EMIT
567 #define TARGET_MODE_EMIT sh_emit_mode_set
569 #undef TARGET_MODE_NEEDED
570 #define TARGET_MODE_NEEDED sh_mode_needed
572 #undef TARGET_MODE_AFTER
573 #define TARGET_MODE_AFTER sh_mode_after
575 #undef TARGET_MODE_ENTRY
576 #define TARGET_MODE_ENTRY sh_mode_entry
578 #undef TARGET_MODE_EXIT
579 #define TARGET_MODE_EXIT sh_mode_exit
581 #undef TARGET_MODE_PRIORITY
582 #define TARGET_MODE_PRIORITY sh_mode_priority
584 /* Return regmode weight for insn. */
585 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
586 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
588 /* Return current register pressure for regmode. */
589 #define CURR_REGMODE_PRESSURE(MODE)\
590 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
592 #undef TARGET_ENCODE_SECTION_INFO
593 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
595 #undef TARGET_LRA_P
596 #define TARGET_LRA_P sh_lra_p
598 #undef TARGET_SECONDARY_RELOAD
599 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
601 #undef TARGET_PREFERRED_RELOAD_CLASS
602 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
604 #undef TARGET_CONDITIONAL_REGISTER_USAGE
605 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
607 #undef TARGET_LEGITIMATE_ADDRESS_P
608 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
610 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
611 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
613 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
614 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
615 sh_legitimize_address_displacement
617 #undef TARGET_TRAMPOLINE_INIT
618 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
619 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
620 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
622 #undef TARGET_LEGITIMATE_CONSTANT_P
623 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
625 #undef TARGET_CANONICALIZE_COMPARISON
626 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
628 #undef TARGET_LEGITIMATE_COMBINED_INSN
629 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
631 #undef TARGET_FIXED_CONDITION_CODE_REGS
632 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
634 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
635 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
636 sh_use_by_pieces_infrastructure_p
638 /* Machine-specific symbol_ref flags. */
639 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
641 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
642 is used by optabs.c atomic op expansion code as well as in sync.md. */
643 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
644 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
646 #undef TARGET_CANNOT_FORCE_CONST_MEM
647 #define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
649 #undef TARGET_HARD_REGNO_NREGS
650 #define TARGET_HARD_REGNO_NREGS sh_hard_regno_nregs
651 #undef TARGET_HARD_REGNO_MODE_OK
652 #define TARGET_HARD_REGNO_MODE_OK sh_hard_regno_mode_ok
654 #undef TARGET_MODES_TIEABLE_P
655 #define TARGET_MODES_TIEABLE_P sh_modes_tieable_p
657 #undef TARGET_CAN_CHANGE_MODE_CLASS
658 #define TARGET_CAN_CHANGE_MODE_CLASS sh_can_change_mode_class
660 struct gcc_target targetm = TARGET_INITIALIZER;
663 /* Information on the currently selected atomic model.
664 This is initialized in sh_option_override. */
665 static sh_atomic_model selected_atomic_model_;
667 const sh_atomic_model&
668 selected_atomic_model (void)
670 return selected_atomic_model_;
673 static sh_atomic_model
674 parse_validate_atomic_model_option (const char* str)
676 const char* model_names[sh_atomic_model::num_models];
677 model_names[sh_atomic_model::none] = "none";
678 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
679 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
680 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
681 model_names[sh_atomic_model::soft_imask] = "soft-imask";
683 const char* model_cdef_names[sh_atomic_model::num_models];
684 model_cdef_names[sh_atomic_model::none] = "NONE";
685 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
686 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
687 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
688 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
690 sh_atomic_model ret;
691 ret.type = sh_atomic_model::none;
692 ret.name = model_names[sh_atomic_model::none];
693 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
694 ret.strict = false;
695 ret.tcb_gbr_offset = -1;
697 /* Handle empty string as 'none'. */
698 if (str == NULL || *str == '\0')
699 return ret;
701 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
703 std::vector<std::string> tokens;
704 for (std::stringstream ss (str); ss.good (); )
706 tokens.push_back (std::string ());
707 std::getline (ss, tokens.back (), ',');
710 if (tokens.empty ())
711 err_ret ("invalid atomic model option");
713 /* The first token must be the atomic model name. */
715 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
716 if (tokens.front () == model_names[i])
718 ret.type = (sh_atomic_model::enum_type)i;
719 ret.name = model_names[i];
720 ret.cdef_name = model_cdef_names[i];
721 goto got_mode_name;
724 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
725 got_mode_name:;
728 /* Go through the remaining tokens. */
729 for (size_t i = 1; i < tokens.size (); ++i)
731 if (tokens[i] == "strict")
732 ret.strict = true;
733 else if (tokens[i].find ("gbr-offset=") == 0)
735 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
736 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
737 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
738 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
739 "option", offset_str.c_str ());
741 else
742 err_ret ("unknown parameter \"%s\" in atomic model option",
743 tokens[i].c_str ());
746 /* Check that the selection makes sense. */
747 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
748 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
749 ret.name);
751 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
752 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
754 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
755 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
757 if (ret.type == sh_atomic_model::soft_tcb
758 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
759 || (ret.tcb_gbr_offset & 3) != 0))
760 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
761 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
762 ret.name);
764 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
765 err_ret ("cannot use atomic model %s in user mode", ret.name);
767 return ret;
769 #undef err_ret
772 /* Register SH specific RTL passes. */
773 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
774 const char* name);
775 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
776 const char* name);
777 static void
778 register_sh_passes (void)
780 /* Running the sh_treg_combine pass after ce1 generates better code when
781 comparisons are combined and reg-reg moves are introduced, because
782 reg-reg moves will be eliminated afterwards. However, there are quite
783 some cases where combine will be unable to fold comparison related insns,
784 thus for now don't do it.
785 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
786 PASS_POS_INSERT_AFTER, "ce1", 1);
789 /* Run sh_treg_combine pass after combine but before register allocation. */
790 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
791 PASS_POS_INSERT_AFTER, "split1", 1);
793 /* Run sh_treg_combine pass after register allocation and basic block
794 reordering as this sometimes creates new opportunities. */
795 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
796 PASS_POS_INSERT_AFTER, "split4", 1);
798 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
799 is known after a conditional branch.
800 This must be done after basic blocks and branch conditions have
801 stabilized and won't be changed by further passes. */
802 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
803 PASS_POS_INSERT_BEFORE, "sched2", 1);
806 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
807 various options, and do some machine dependent initialization. */
808 static void
809 sh_option_override (void)
811 int regno;
813 SUBTARGET_OVERRIDE_OPTIONS;
815 sh_cpu = PROCESSOR_SH1;
816 assembler_dialect = 0;
817 if (TARGET_SH2)
818 sh_cpu = PROCESSOR_SH2;
819 if (TARGET_SH2E)
820 sh_cpu = PROCESSOR_SH2E;
821 if (TARGET_SH2A)
822 sh_cpu = PROCESSOR_SH2A;
823 if (TARGET_SH3)
824 sh_cpu = PROCESSOR_SH3;
825 if (TARGET_SH3E)
826 sh_cpu = PROCESSOR_SH3E;
827 if (TARGET_SH4)
829 assembler_dialect = 1;
830 sh_cpu = PROCESSOR_SH4;
832 if (TARGET_SH4A)
834 assembler_dialect = 1;
835 sh_cpu = PROCESSOR_SH4A;
838 /* User/priviledged mode is supported only on SH3* and SH4*.
839 Disable it for everything else. */
840 if (!TARGET_SH3 && TARGET_USERMODE)
841 TARGET_USERMODE = false;
843 if (! strcmp (sh_div_str, "call-div1"))
844 sh_div_strategy = SH_DIV_CALL_DIV1;
845 else if (! strcmp (sh_div_str, "call-fp") && TARGET_FPU_ANY)
846 sh_div_strategy = SH_DIV_CALL_FP;
847 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
848 sh_div_strategy = SH_DIV_CALL_TABLE;
849 else
851 /* Pick one that makes most sense for the target in general.
852 It is not much good to use different functions depending on -Os,
853 since then we'll end up with two different functions when some of
854 the code is compiled for size, and some for speed. */
856 /* SH4 tends to emphasize speed. */
857 if (TARGET_HARD_SH4)
858 sh_div_strategy = SH_DIV_CALL_TABLE;
859 /* These have their own way of doing things. */
860 else if (TARGET_SH2A)
861 sh_div_strategy = SH_DIV_INTRINSIC;
862 /* SH1 .. SH3 cores often go into small-footprint systems, so
863 default to the smallest implementation available. */
864 else
865 sh_div_strategy = SH_DIV_CALL_DIV1;
868 if (sh_divsi3_libfunc[0])
869 ; /* User supplied - leave it alone. */
870 else if (TARGET_DIVIDE_CALL_FP)
871 sh_divsi3_libfunc = "__sdivsi3_i4";
872 else if (TARGET_DIVIDE_CALL_TABLE)
873 sh_divsi3_libfunc = "__sdivsi3_i4i";
874 else
875 sh_divsi3_libfunc = "__sdivsi3";
877 if (sh_branch_cost == -1)
879 /* The SH1 does not have delay slots, hence we get a pipeline stall
880 at every branch. The SH4 is superscalar, so the single delay slot
881 is not sufficient to keep both pipelines filled.
882 In any case, set the default branch cost to '2', as it results in
883 slightly overall smaller code and also enables some if conversions
884 that are required for matching special T bit related insns. */
885 sh_branch_cost = 2;
888 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
889 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
890 TARGET_ZDCBRANCH = 1;
892 /* FDPIC code is a special form of PIC, and the vast majority of code
893 generation constraints that apply to PIC also apply to FDPIC, so we
894 set flag_pic to avoid the need to check TARGET_FDPIC everywhere
895 flag_pic is checked. */
896 if (TARGET_FDPIC && !flag_pic)
897 flag_pic = 2;
899 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
900 if (! VALID_REGISTER_P (regno))
901 sh_register_names[regno][0] = '\0';
903 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
904 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
905 sh_additional_register_names[regno][0] = '\0';
907 if (flag_pic && ! TARGET_PREFERGOT)
908 flag_no_function_cse = 1;
910 if (targetm.small_register_classes_for_mode_p (VOIDmode))
912 /* Never run scheduling before reload, since that can
913 break global alloc, and generates slower code anyway due
914 to the pressure on R0. */
915 /* Enable sched1 for SH4 if the user explicitly requests.
916 When sched1 is enabled, the ready queue will be reordered by
917 the target hooks if pressure is high. We can not do this for
918 PIC, SH3 and lower as they give spill failures for R0. */
919 if (!TARGET_HARD_SH4 || flag_pic)
920 flag_schedule_insns = 0;
921 /* ??? Current exception handling places basic block boundaries
922 after call_insns. It causes the high pressure on R0 and gives
923 spill failures for R0 in reload. See PR 22553 and the thread
924 on gcc-patches
925 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
926 else if (flag_exceptions)
928 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
929 warning (0, "ignoring -fschedule-insns because of exception "
930 "handling bug");
931 flag_schedule_insns = 0;
933 else if (flag_schedule_insns
934 && !global_options_set.x_flag_schedule_insns)
935 flag_schedule_insns = 0;
938 /* Unwind info is not correct around the CFG unless either a frame
939 pointer is present or M_A_O_A is set. Fixing this requires rewriting
940 unwind info generation to be aware of the CFG and propagating states
941 around edges. */
942 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
943 || flag_exceptions || flag_non_call_exceptions)
944 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
946 warning (0, "unwind tables currently require either a frame pointer "
947 "or -maccumulate-outgoing-args for correctness");
948 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
951 if (flag_unsafe_math_optimizations)
953 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
954 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
955 TARGET_FSCA = 1;
957 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
958 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
959 TARGET_FSRRA = 1;
962 /* Allow fsrra insn only if -funsafe-math-optimizations and
963 -ffinite-math-only is enabled. */
964 TARGET_FSRRA = TARGET_FSRRA
965 && flag_unsafe_math_optimizations
966 && flag_finite_math_only;
968 /* If the -mieee option was not explicitly set by the user, turn it on
969 unless -ffinite-math-only was specified. See also PR 33135. */
970 if (! global_options_set.x_TARGET_IEEE)
971 TARGET_IEEE = ! flag_finite_math_only;
973 if (sh_fixed_range_str)
974 sh_fix_range (sh_fixed_range_str);
976 /* This target defaults to strict volatile bitfields. */
977 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
978 flag_strict_volatile_bitfields = 1;
980 sh_override_options_after_change ();
982 /* Parse atomic model option and make sure it is valid for the current
983 target CPU. */
984 selected_atomic_model_
985 = parse_validate_atomic_model_option (sh_atomic_model_str);
987 register_sh_passes ();
990 /* Implement targetm.override_options_after_change. */
992 static void
993 sh_override_options_after_change (void)
995 /* Adjust loop, jump and function alignment values (in bytes), if those
996 were not specified by the user using -falign-loops, -falign-jumps
997 and -falign-functions options.
998 32 bit alignment is better for speed, because instructions can be
999 fetched as a pair from a longword boundary. For size use 16 bit
1000 alignment to get more compact code.
1001 Aligning all jumps increases the code size, even if it might
1002 result in slightly faster code. Thus, it is set to the smallest
1003 alignment possible if not specified by the user. */
1004 if (align_loops == 0)
1005 align_loops = optimize_size ? 2 : 4;
1007 if (align_jumps == 0)
1008 align_jumps = 2;
1009 else if (align_jumps < 2)
1010 align_jumps = 2;
1012 if (align_functions == 0)
1013 align_functions = optimize_size ? 2 : 4;
1015 /* The linker relaxation code breaks when a function contains
1016 alignments that are larger than that at the start of a
1017 compilation unit. */
1018 if (TARGET_RELAX)
1020 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1022 /* Also take possible .long constants / mova tables into account. */
1023 if (min_align < 4)
1024 min_align = 4;
1025 if (align_functions < min_align)
1026 align_functions = min_align;
1030 /* Print the operand address in x to the stream. */
1031 static void
1032 sh_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
1034 switch (GET_CODE (x))
1036 case REG:
1037 case SUBREG:
1038 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1039 break;
1041 case PLUS:
1043 rtx base = XEXP (x, 0);
1044 rtx index = XEXP (x, 1);
1046 switch (GET_CODE (index))
1048 case CONST_INT:
1049 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1050 reg_names[true_regnum (base)]);
1051 break;
1053 case REG:
1054 case SUBREG:
1056 int base_num = true_regnum (base);
1057 int index_num = true_regnum (index);
1059 /* If base or index is R0, make sure that it comes first.
1060 Usually one of them will be R0, but the order might be wrong.
1061 If neither base nor index are R0 it's an error and we just
1062 pass it on to the assembler. This avoids silent wrong code
1063 bugs. */
1064 if (base_num == 0 && index_num != 0)
1065 std::swap (base_num, index_num);
1067 fprintf (stream, "@(%s,%s)", reg_names[index_num],
1068 reg_names[base_num]);
1069 break;
1072 default:
1073 gcc_unreachable ();
1076 break;
1078 case PRE_DEC:
1079 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1080 break;
1082 case POST_INC:
1083 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1084 break;
1086 default:
1087 x = mark_constant_pool_use (x);
1088 output_addr_const (stream, x);
1089 break;
1093 /* Print operand x (an rtx) in assembler syntax to file stream
1094 according to modifier code.
1096 '.' print a .s if insn needs delay slot
1097 ',' print LOCAL_LABEL_PREFIX
1098 '@' print trap, rte or rts depending upon pragma interruptness
1099 '#' output a nop if there is nothing to put in the delay slot
1100 ''' print likelihood suffix (/u for unlikely).
1101 '>' print branch target if -fverbose-asm
1102 'O' print a constant without the #
1103 'R' print the LSW of a dp value - changes if in little endian
1104 'S' print the MSW of a dp value - changes if in little endian
1105 'T' print the next word of a dp value - same as 'R' in big endian mode.
1106 'M' print .b / .w / .l / .s / .d suffix if operand is a MEM.
1107 'N' print 'r63' if the operand is (const_int 0).
1108 'd' print a V2SF reg as dN instead of fpN.
1109 'm' print a pair `base,offset' or `base,index', for LD and ST.
1110 'U' Likewise for {LD,ST}{HI,LO}.
1111 'V' print the position of a single bit set.
1112 'W' print the position of a single bit cleared.
1113 't' print a memory address which is a register.
1114 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1115 'o' output an operator. */
1116 static void
1117 sh_print_operand (FILE *stream, rtx x, int code)
1119 int regno;
1120 machine_mode mode;
1122 switch (code)
1124 tree trapa_attr;
1126 case '.':
1127 if (final_sequence
1128 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1129 && get_attr_length (final_sequence->insn (1)))
1130 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1131 break;
1132 case ',':
1133 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1134 break;
1135 case '@':
1136 trapa_attr = lookup_attribute ("trap_exit",
1137 DECL_ATTRIBUTES (current_function_decl));
1138 if (trapa_attr)
1139 fprintf (stream, "trapa #%ld",
1140 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1141 else if (sh_cfun_interrupt_handler_p ())
1143 if (sh_cfun_resbank_handler_p ())
1144 fprintf (stream, "resbank\n");
1145 fprintf (stream, "rte");
1147 else
1148 fprintf (stream, "rts");
1149 break;
1150 case '#':
1151 /* Output a nop if there's nothing in the delay slot. */
1152 if (dbr_sequence_length () == 0)
1153 fprintf (stream, "\n\tnop");
1154 break;
1155 case '\'':
1157 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1159 if (note
1160 && profile_probability::from_reg_br_prob_note (XINT (note, 0))
1161 < profile_probability::even ())
1162 fputs ("/u", stream);
1163 break;
1165 case '>':
1166 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1168 fputs ("\t! target: ", stream);
1169 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1171 break;
1172 case 'O':
1173 x = mark_constant_pool_use (x);
1174 output_addr_const (stream, x);
1175 break;
1176 /* N.B.: %R / %S / %T adjust memory addresses by four.
1177 While they can be used to access 64 bit parts of a larger value
1178 held in general purpose registers, that won't work with memory -
1179 neither for fp registers, since the frxx names are used. */
1180 case 'R':
1181 if (REG_P (x) || GET_CODE (x) == SUBREG)
1183 regno = true_regnum (x);
1184 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1185 fputs (reg_names[regno], (stream));
1187 else if (MEM_P (x))
1189 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1190 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1192 else
1194 rtx sub = NULL_RTX;
1196 mode = GET_MODE (x);
1197 if (mode == VOIDmode)
1198 mode = DImode;
1199 if (GET_MODE_SIZE (mode) >= 8)
1200 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1201 if (sub)
1202 sh_print_operand (stream, sub, 0);
1203 else
1204 output_operand_lossage ("invalid operand to %%R");
1206 break;
1207 case 'S':
1208 if (REG_P (x) || GET_CODE (x) == SUBREG)
1210 regno = true_regnum (x);
1211 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1212 fputs (reg_names[regno], (stream));
1214 else if (MEM_P (x))
1216 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1217 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1219 else
1221 rtx sub = NULL_RTX;
1223 mode = GET_MODE (x);
1224 if (mode == VOIDmode)
1225 mode = DImode;
1226 if (GET_MODE_SIZE (mode) >= 8)
1227 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1228 if (sub)
1229 sh_print_operand (stream, sub, 0);
1230 else
1231 output_operand_lossage ("invalid operand to %%S");
1233 break;
1234 case 'T':
1235 /* Next word of a double. */
1236 switch (GET_CODE (x))
1238 case REG:
1239 fputs (reg_names[REGNO (x) + 1], (stream));
1240 break;
1241 case MEM:
1243 machine_mode mode = GET_MODE (x);
1244 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1245 && GET_CODE (XEXP (x, 0)) != POST_INC)
1246 x = adjust_address (x, SImode, 4);
1247 sh_print_operand_address (stream, mode, XEXP (x, 0));
1249 break;
1250 default:
1251 break;
1253 break;
1255 case 't':
1256 gcc_assert (MEM_P (x));
1257 x = XEXP (x, 0);
1258 switch (GET_CODE (x))
1260 case REG:
1261 case SUBREG:
1262 sh_print_operand (stream, x, 0);
1263 break;
1264 default:
1265 break;
1267 break;
1269 case 'o':
1270 switch (GET_CODE (x))
1272 case PLUS: fputs ("add", stream); break;
1273 case MINUS: fputs ("sub", stream); break;
1274 case MULT: fputs ("mul", stream); break;
1275 case DIV: fputs ("div", stream); break;
1276 case EQ: fputs ("eq", stream); break;
1277 case NE: fputs ("ne", stream); break;
1278 case GT: case LT: fputs ("gt", stream); break;
1279 case GE: case LE: fputs ("ge", stream); break;
1280 case GTU: case LTU: fputs ("gtu", stream); break;
1281 case GEU: case LEU: fputs ("geu", stream); break;
1282 default:
1283 break;
1285 break;
1286 case 'M':
1287 if (MEM_P (x))
1289 switch (GET_MODE (x))
1291 case E_QImode: fputs (".b", stream); break;
1292 case E_HImode: fputs (".w", stream); break;
1293 case E_SImode: fputs (".l", stream); break;
1294 case E_SFmode: fputs (".s", stream); break;
1295 case E_DFmode: fputs (".d", stream); break;
1296 default: gcc_unreachable ();
1299 break;
1301 case 'm':
1302 gcc_assert (MEM_P (x));
1303 x = XEXP (x, 0);
1304 /* Fall through. */
1305 case 'U':
1306 switch (GET_CODE (x))
1308 case REG:
1309 case SUBREG:
1310 sh_print_operand (stream, x, 0);
1311 fputs (", 0", stream);
1312 break;
1314 case PLUS:
1315 sh_print_operand (stream, XEXP (x, 0), 0);
1316 fputs (", ", stream);
1317 sh_print_operand (stream, XEXP (x, 1), 0);
1318 break;
1320 default:
1321 gcc_unreachable ();
1323 break;
1325 case 'V':
1327 int num = exact_log2 (INTVAL (x));
1328 gcc_assert (num >= 0);
1329 fprintf (stream, "#%d", num);
1331 break;
1333 case 'W':
1335 int num = exact_log2 (~INTVAL (x));
1336 gcc_assert (num >= 0);
1337 fprintf (stream, "#%d", num);
1339 break;
1341 case 'd':
1342 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1344 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1345 break;
1347 case 'N':
1348 if (x == CONST0_RTX (GET_MODE (x)))
1350 fprintf ((stream), "r63");
1351 break;
1353 goto default_output;
1354 case 'u':
1355 if (CONST_INT_P (x))
1357 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1358 break;
1360 /* Fall through. */
1362 default_output:
1363 default:
1364 regno = 0;
1365 mode = GET_MODE (x);
1367 switch (GET_CODE (x))
1369 case TRUNCATE:
1371 rtx inner = XEXP (x, 0);
1372 int offset = 0;
1373 machine_mode inner_mode;
1375 /* We might see SUBREGs with vector mode registers inside. */
1376 if (GET_CODE (inner) == SUBREG
1377 && (GET_MODE_SIZE (GET_MODE (inner))
1378 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1379 && subreg_lowpart_p (inner))
1380 inner = SUBREG_REG (inner);
1381 if (CONST_INT_P (inner))
1383 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1384 goto default_output;
1386 inner_mode = GET_MODE (inner);
1387 if (GET_CODE (inner) == SUBREG
1388 && (GET_MODE_SIZE (GET_MODE (inner))
1389 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1390 && REG_P (SUBREG_REG (inner)))
1392 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1393 GET_MODE (SUBREG_REG (inner)),
1394 SUBREG_BYTE (inner),
1395 GET_MODE (inner));
1396 inner = SUBREG_REG (inner);
1398 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1399 abort ();
1400 /* Floating point register pairs are always big endian;
1401 general purpose registers are 64 bit wide. */
1402 regno = REGNO (inner);
1403 regno = (hard_regno_nregs (regno, inner_mode)
1404 - hard_regno_nregs (regno, mode))
1405 + offset;
1406 x = inner;
1407 goto reg;
1409 case SIGN_EXTEND:
1410 x = XEXP (x, 0);
1411 goto reg;
1412 case SUBREG:
1413 gcc_assert (SUBREG_BYTE (x) == 0
1414 && REG_P (SUBREG_REG (x)));
1416 x = SUBREG_REG (x);
1417 /* Fall through. */
1419 reg:
1420 case REG:
1421 regno += REGNO (x);
1422 if (FP_REGISTER_P (regno)
1423 && mode == V16SFmode)
1424 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1425 else if (FP_REGISTER_P (REGNO (x))
1426 && mode == V4SFmode)
1427 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1428 else if (REG_P (x)
1429 && mode == V2SFmode)
1430 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1431 else if (FP_REGISTER_P (REGNO (x))
1432 && GET_MODE_SIZE (mode) > 4)
1433 fprintf ((stream), "d%s", reg_names[regno] + 1);
1434 else
1435 fputs (reg_names[regno], (stream));
1436 break;
1438 case MEM:
1439 output_address (GET_MODE (x), XEXP (x, 0));
1440 break;
1442 default:
1443 fputc ('#', stream);
1444 output_addr_const (stream, x);
1445 break;
1447 break;
1451 static bool
1452 sh_print_operand_punct_valid_p (unsigned char code)
1454 return (code == '.' || code == '#' || code == '@' || code == ','
1455 || code == '$' || code == '\'' || code == '>');
1458 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1459 static bool
1460 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1462 if (GET_CODE (x) == UNSPEC)
1464 switch (XINT (x, 1))
1466 case UNSPEC_PIC:
1467 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1468 output_addr_const (file, XVECEXP (x, 0, 0));
1469 break;
1470 case UNSPEC_GOT:
1471 output_addr_const (file, XVECEXP (x, 0, 0));
1472 fputs ("@GOT", file);
1473 break;
1474 case UNSPEC_GOTOFF:
1475 output_addr_const (file, XVECEXP (x, 0, 0));
1476 fputs ("@GOTOFF", file);
1477 break;
1478 case UNSPEC_PLT:
1479 output_addr_const (file, XVECEXP (x, 0, 0));
1480 fputs ("@PLT", file);
1481 break;
1482 case UNSPEC_GOTPLT:
1483 output_addr_const (file, XVECEXP (x, 0, 0));
1484 fputs ("@GOTPLT", file);
1485 break;
1486 case UNSPEC_PCREL:
1487 output_addr_const (file, XVECEXP (x, 0, 0));
1488 fputs ("@PCREL", file);
1489 break;
1490 case UNSPEC_DTPOFF:
1491 output_addr_const (file, XVECEXP (x, 0, 0));
1492 fputs ("@DTPOFF", file);
1493 break;
1494 case UNSPEC_GOTTPOFF:
1495 output_addr_const (file, XVECEXP (x, 0, 0));
1496 fputs ("@GOTTPOFF", file);
1497 break;
1498 case UNSPEC_TPOFF:
1499 output_addr_const (file, XVECEXP (x, 0, 0));
1500 fputs ("@TPOFF", file);
1501 break;
1502 case UNSPEC_CALLER:
1504 char name[32];
1505 /* LPCS stands for Label for PIC Call Site. */
1506 targetm.asm_out.generate_internal_label (name, "LPCS",
1507 INTVAL (XVECEXP (x, 0, 0)));
1508 assemble_name (file, name);
1510 break;
1511 case UNSPEC_SYMOFF:
1512 output_addr_const (file, XVECEXP (x, 0, 0));
1513 fputc ('-', file);
1514 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1516 fputc ('(', file);
1517 output_addr_const (file, XVECEXP (x, 0, 1));
1518 fputc (')', file);
1520 else
1521 output_addr_const (file, XVECEXP (x, 0, 1));
1522 break;
1523 case UNSPEC_PCREL_SYMOFF:
1524 output_addr_const (file, XVECEXP (x, 0, 0));
1525 fputs ("-(", file);
1526 output_addr_const (file, XVECEXP (x, 0, 1));
1527 fputs ("-.)", file);
1528 break;
1529 case UNSPEC_GOTFUNCDESC:
1530 output_addr_const (file, XVECEXP (x, 0, 0));
1531 fputs ("@GOTFUNCDESC", file);
1532 break;
1533 case UNSPEC_GOTOFFFUNCDESC:
1534 output_addr_const (file, XVECEXP (x, 0, 0));
1535 fputs ("@GOTOFFFUNCDESC", file);
1536 break;
1537 default:
1538 return false;
1540 return true;
1542 else
1543 return false;
1546 /* Encode symbol attributes of a SYMBOL_REF into its
1547 SYMBOL_REF_FLAGS. */
1548 static void
1549 sh_encode_section_info (tree decl, rtx rtl, int first)
1551 default_encode_section_info (decl, rtl, first);
1553 if (TREE_CODE (decl) == FUNCTION_DECL
1554 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1555 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1558 /* Prepare operands for a move define_expand; specifically, one of the
1559 operands must be in a register. */
1560 void
1561 prepare_move_operands (rtx operands[], machine_mode mode)
1563 if ((mode == SImode || mode == DImode)
1564 && flag_pic
1565 && ! ((mode == Pmode || mode == ptr_mode)
1566 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1568 rtx temp;
1569 if (SYMBOLIC_CONST_P (operands[1]))
1571 if (MEM_P (operands[0]))
1572 operands[1] = force_reg (Pmode, operands[1]);
1573 else
1575 temp = (!can_create_pseudo_p ()
1576 ? operands[0]
1577 : gen_reg_rtx (Pmode));
1578 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1581 else if (GET_CODE (operands[1]) == CONST
1582 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1583 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1585 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1586 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1587 mode, temp);
1588 operands[1] = expand_binop (mode, add_optab, temp,
1589 XEXP (XEXP (operands[1], 0), 1),
1590 (!can_create_pseudo_p ()
1591 ? temp
1592 : gen_reg_rtx (Pmode)),
1593 0, OPTAB_LIB_WIDEN);
1597 if (! reload_in_progress && ! reload_completed)
1599 /* Copy the source to a register if both operands aren't registers. */
1600 if (! register_operand (operands[0], mode)
1601 && ! register_operand (operands[1], mode))
1602 operands[1] = copy_to_mode_reg (mode, operands[1]);
1604 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1606 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1607 except that we can't use that function because it is static. */
1608 rtx new_rtx = change_address (operands[0], mode, 0);
1609 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1610 operands[0] = new_rtx;
1613 /* This case can happen while generating code to move the result
1614 of a library call to the target. Reject `st r0,@(rX,rY)' because
1615 reload will fail to find a spill register for rX, since r0 is already
1616 being used for the source. */
1617 else if (refers_to_regno_p (R0_REG, operands[1])
1618 && MEM_P (operands[0])
1619 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1620 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1621 operands[1] = copy_to_mode_reg (mode, operands[1]);
1623 /* When the displacement addressing is used, RA will assign r0 to
1624 the pseudo register operand for the QI/HImode load/store.
1625 This tends to make a long live range for R0 and might cause
1626 anomalous register spills in some case with LRA. See PR
1627 target/55212.
1628 We split possible load/store to two move insns via r0 so as to
1629 shorten R0 live range. It will make some codes worse but will
1630 win on average for LRA.
1631 Also when base+index addressing is used and the index term is
1632 a subreg, LRA assumes that more hard registers can be available
1633 in some situation. It isn't the case for SH in the problematic
1634 case. We can pre-allocate R0 for that index term to avoid
1635 the issue. See PR target/66591. */
1636 else if (sh_lra_p ()
1637 && ! TARGET_SH2A
1638 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1639 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1641 bool load_p = REG_P (operands[0]);
1642 rtx reg = operands[load_p ? 0 : 1];
1643 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1645 if ((mode == QImode || mode == HImode)
1646 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1647 && GET_CODE (adr) == PLUS
1648 && REG_P (XEXP (adr, 0))
1649 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1650 && CONST_INT_P (XEXP (adr, 1))
1651 && INTVAL (XEXP (adr, 1)) != 0
1652 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1654 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1655 emit_move_insn (r0_rtx, operands[1]);
1656 operands[1] = r0_rtx;
1658 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1659 && GET_CODE (adr) == PLUS
1660 && REG_P (XEXP (adr, 0))
1661 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1662 && SUBREG_P (XEXP (adr, 1))
1663 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1665 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1666 emit_move_insn (r0_rtx, XEXP (adr, 1));
1667 XEXP (adr, 1) = r0_rtx;
1672 if (mode == Pmode || mode == ptr_mode)
1674 rtx op0 = operands[0];
1675 rtx op1 = operands[1];
1676 rtx opc;
1677 if (GET_CODE (op1) == CONST
1678 && GET_CODE (XEXP (op1, 0)) == PLUS
1679 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1680 != TLS_MODEL_NONE))
1682 opc = XEXP (XEXP (op1, 0), 1);
1683 op1 = XEXP (XEXP (op1, 0), 0);
1685 else
1686 opc = NULL_RTX;
1688 enum tls_model tls_kind;
1690 if (! reload_in_progress && ! reload_completed
1691 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1693 rtx tga_op1, tga_ret, tmp, tmp2;
1695 if (! flag_pic
1696 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1697 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1698 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1700 static int got_labelno;
1701 /* Don't schedule insns for getting GOT address when
1702 the first scheduling is enabled, to avoid spill
1703 failures for R0. */
1704 if (flag_schedule_insns)
1705 emit_insn (gen_blockage ());
1706 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1707 emit_use (gen_rtx_REG (SImode, PIC_REG));
1708 if (flag_schedule_insns)
1709 emit_insn (gen_blockage ());
1712 switch (tls_kind)
1714 case TLS_MODEL_GLOBAL_DYNAMIC:
1715 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1716 if (TARGET_FDPIC)
1717 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1718 sh_get_fdpic_reg_initial_val ());
1719 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1720 tmp = gen_reg_rtx (Pmode);
1721 emit_move_insn (tmp, tga_ret);
1722 op1 = tmp;
1723 break;
1725 case TLS_MODEL_LOCAL_DYNAMIC:
1726 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1727 if (TARGET_FDPIC)
1728 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1729 sh_get_fdpic_reg_initial_val ());
1730 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1732 tmp = gen_reg_rtx (Pmode);
1733 emit_move_insn (tmp, tga_ret);
1735 if (register_operand (op0, Pmode))
1736 tmp2 = op0;
1737 else
1738 tmp2 = gen_reg_rtx (Pmode);
1740 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1741 op1 = tmp2;
1742 break;
1744 case TLS_MODEL_INITIAL_EXEC:
1745 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1746 tmp = gen_sym2GOTTPOFF (op1);
1747 if (TARGET_FDPIC)
1748 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1749 sh_get_fdpic_reg_initial_val ());
1750 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1751 op1 = tga_op1;
1752 break;
1754 case TLS_MODEL_LOCAL_EXEC:
1755 tmp2 = gen_reg_rtx (Pmode);
1756 emit_insn (gen_store_gbr (tmp2));
1757 tmp = gen_reg_rtx (Pmode);
1758 emit_insn (gen_symTPOFF2reg (tmp, op1));
1760 if (register_operand (op0, Pmode))
1761 op1 = op0;
1762 else
1763 op1 = gen_reg_rtx (Pmode);
1765 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1766 break;
1768 default:
1769 gcc_unreachable ();
1771 if (opc)
1772 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1773 operands[1] = op1;
1777 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
1779 rtx base, offset;
1780 split_const (operands[1], &base, &offset);
1782 if (GET_CODE (base) == SYMBOL_REF
1783 && !offset_within_block_p (base, INTVAL (offset)))
1785 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
1786 emit_move_insn (tmp, base);
1787 if (!arith_operand (offset, mode))
1788 offset = force_reg (mode, offset);
1789 emit_insn (gen_add3_insn (operands[0], tmp, offset));
1794 /* Implement the canonicalize_comparison target hook for the combine
1795 pass. For the target hook this function is invoked via
1796 sh_canonicalize_comparison. This function is also re-used to
1797 canonicalize comparisons in cbranch pattern expanders. */
1798 static void
1799 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1800 machine_mode mode,
1801 bool op0_preserve_value)
1803 /* When invoked from within the combine pass the mode is not specified,
1804 so try to get it from one of the operands. */
1805 if (mode == VOIDmode)
1806 mode = GET_MODE (op0);
1807 if (mode == VOIDmode)
1808 mode = GET_MODE (op1);
1810 // We need to have a mode to do something useful here.
1811 if (mode == VOIDmode)
1812 return;
1814 // Currently, we don't deal with floats here.
1815 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1816 return;
1818 // Make sure that the constant operand is the second operand.
1819 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1821 if (op0_preserve_value)
1822 return;
1824 std::swap (op0, op1);
1825 cmp = swap_condition (cmp);
1828 if (CONST_INT_P (op1))
1830 /* Try to adjust the constant operand in such a way that available
1831 comparison insns can be utilized better and the constant can be
1832 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1833 constant pool. */
1834 const HOST_WIDE_INT val = INTVAL (op1);
1836 /* x > -1 --> x >= 0
1837 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1838 x <= -1 --> x < 0
1839 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1840 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1842 cmp = cmp == GT ? GE : LT;
1843 op1 = gen_int_mode (val + 1, mode);
1846 /* x >= 1 --> x > 0
1847 x >= 0x80 --> x > 0x7F
1848 x < 1 --> x <= 0
1849 x < 0x80 --> x <= 0x7F */
1850 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1852 cmp = cmp == GE ? GT : LE;
1853 op1 = gen_int_mode (val - 1, mode);
1856 /* unsigned x >= 1 --> x != 0
1857 unsigned x < 1 --> x == 0 */
1858 else if (val == 1 && (cmp == GEU || cmp == LTU))
1860 cmp = cmp == GEU ? NE : EQ;
1861 op1 = CONST0_RTX (mode);
1864 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1865 unsigned x < 0x80 --> unsigned x < 0x7F */
1866 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1868 cmp = cmp == GEU ? GTU : LEU;
1869 op1 = gen_int_mode (val - 1, mode);
1872 /* unsigned x > 0 --> x != 0
1873 unsigned x <= 0 --> x == 0 */
1874 else if (val == 0 && (cmp == GTU || cmp == LEU))
1875 cmp = cmp == GTU ? NE : EQ;
1877 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1878 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1879 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1880 && val == 0x7FFFFFFF)
1882 cmp = cmp == GTU ? LT : GE;
1883 op1 = const0_rtx;
1886 /* unsigned x >= 0x80000000 --> signed x < 0
1887 unsigned x < 0x80000000 --> signed x >= 0 */
1888 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1889 && (unsigned HOST_WIDE_INT)val
1890 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1892 cmp = cmp == GEU ? LT : GE;
1893 op1 = const0_rtx;
1898 /* This function implements the canonicalize_comparison target hook.
1899 This wrapper around the internally used sh_canonicalize_comparison
1900 function is needed to do the enum rtx_code <-> int conversion.
1901 Target hooks cannot use enum rtx_code in its definition. */
1902 static void
1903 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1904 bool op0_preserve_value)
1906 enum rtx_code tmp_code = (enum rtx_code)*code;
1907 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1908 VOIDmode, op0_preserve_value);
1909 *code = (int)tmp_code;
1912 /* This function implements the legitimate_combined_insn target hook,
1913 which the combine pass uses to early reject combined insns, before
1914 it tries to recog the insn and determine its cost. */
1915 static bool
1916 sh_legitimate_combined_insn (rtx_insn* insn)
1918 /* Reject combinations of memory loads and zero extensions, as these
1919 interfere with other combine patterns such as zero extracts and bit
1920 tests. The SH2A movu.{b|w} insns are formed later in the
1921 'sh_optimize_extu_exts' pass after combine/split1. */
1922 rtx p = PATTERN (insn);
1923 if (GET_CODE (p) == SET
1924 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
1925 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
1926 && MEM_P (XEXP (XEXP (p, 1), 0)))
1927 return false;
1929 return true;
1932 bool
1933 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1935 *p1 = T_REG;
1936 *p2 = INVALID_REGNUM;
1937 return true;
1940 /* Try to calculate the branch distance of a conditional branch in bytes.
1942 FIXME: Because of PR 59189 we can't use the CFG here. Instead just
1943 walk from this insn into the next (fall-through) basic block and see if
1944 we hit the label. */
1945 unsigned int
1946 sh_cbranch_distance (rtx_insn* _cbranch_insn, unsigned int max_dist)
1948 rtx_jump_insn* cbranch_insn = safe_as_a<rtx_jump_insn*> (_cbranch_insn);
1950 if (dump_file)
1952 fprintf (dump_file, "sh_cbranch_distance insn = \n");
1953 print_rtl_single (dump_file, cbranch_insn);
1956 unsigned int dist = 0;
1958 for (rtx_insn* i = next_nonnote_insn (cbranch_insn);
1959 i != NULL && dist < max_dist; i = next_nonnote_insn (i))
1961 const unsigned int i_len = get_attr_length (i);
1962 dist += i_len;
1964 if (dump_file)
1965 fprintf (dump_file, " insn %d length = %u dist = %u\n",
1966 INSN_UID (i), i_len, dist);
1968 if (rtx_code_label* l = dyn_cast<rtx_code_label*> (i))
1970 if (l == cbranch_insn->jump_target ())
1972 if (dump_file)
1973 fprintf (dump_file, " cbranch dist = %u\n", dist);
1974 return dist;
1976 break;
1980 if (dump_file)
1981 fprintf (dump_file, " cbranch dist = unknown\n");
1983 return unknown_cbranch_distance;
1986 enum rtx_code
1987 prepare_cbranch_operands (rtx *operands, machine_mode mode,
1988 enum rtx_code comparison)
1990 gcc_assert (can_create_pseudo_p ());
1992 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1993 comparison = GET_CODE (operands[0]);
1995 sh_canonicalize_comparison (comparison, operands[1], operands[2],
1996 mode, false);
1998 rtx op1 = operands[1];
1999 operands[1] = force_reg (mode, op1);
2001 /* When we are handling DImode comparisons, we want to keep constants so
2002 that we can optimize the component comparisons; however, memory loads
2003 are better issued as a whole so that they can be scheduled well.
2004 SImode equality comparisons allow I08 constants, but only when they
2005 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2006 into a register, that register might as well be r0, and we allow the
2007 constant. If it is already in a register, this is likely to be
2008 allocated to a different hard register, thus we load the constant into
2009 a register unless it is zero. */
2010 if (!REG_P (operands[2])
2011 && (!CONST_INT_P (operands[2])
2012 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2013 && ((comparison != EQ && comparison != NE)
2014 || (REG_P (op1) && REGNO (op1) != R0_REG)
2015 || !satisfies_constraint_I08 (operands[2])))))
2016 operands[2] = force_reg (mode, operands[2]);
2018 return comparison;
2021 static void
2022 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison,
2023 profile_probability probability)
2025 rtx (*branch_expander) (rtx) = gen_branch_true;
2026 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2027 switch (comparison)
2029 case NE: case LT: case LE: case LTU: case LEU:
2030 comparison = reverse_condition (comparison);
2031 branch_expander = gen_branch_false;
2032 default: ;
2034 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2035 gen_rtx_fmt_ee (comparison, SImode,
2036 operands[1], operands[2])));
2037 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2038 if (probability.initialized_p ())
2039 add_reg_br_prob_note (jump, probability);
2042 void
2043 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison)
2045 expand_cbranchsi4 (operands, comparison,
2046 profile_probability::uninitialized ());
2049 /* ??? How should we distribute probabilities when more than one branch
2050 is generated. So far we only have some ad-hoc observations:
2051 - If the operands are random, they are likely to differ in both parts.
2052 - If comparing items in a hash chain, the operands are random or equal;
2053 operation should be EQ or NE.
2054 - If items are searched in an ordered tree from the root, we can expect
2055 the highpart to be unequal about half of the time; operation should be
2056 an inequality comparison, operands non-constant, and overall probability
2057 about 50%. Likewise for quicksort.
2058 - Range checks will be often made against constants. Even if we assume for
2059 simplicity an even distribution of the non-constant operand over a
2060 sub-range here, the same probability could be generated with differently
2061 wide sub-ranges - as long as the ratio of the part of the subrange that
2062 is before the threshold to the part that comes after the threshold stays
2063 the same. Thus, we can't really tell anything here;
2064 assuming random distribution is at least simple.
2066 bool
2067 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2069 enum rtx_code msw_taken, msw_skip, lsw_taken;
2070 rtx_code_label *skip_label = NULL;
2071 rtx op1h, op1l, op2h, op2l;
2072 int num_branches;
2073 profile_probability prob, rev_prob;
2074 profile_probability msw_taken_prob = profile_probability::uninitialized (),
2075 msw_skip_prob = profile_probability::uninitialized (),
2076 lsw_taken_prob = profile_probability::uninitialized ();
2078 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2079 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2080 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2081 op1l = gen_lowpart (SImode, operands[1]);
2082 op2l = gen_lowpart (SImode, operands[2]);
2083 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2084 prob = split_branch_probability;
2085 rev_prob = prob.invert ();
2086 switch (comparison)
2088 case EQ:
2089 msw_skip = NE;
2090 lsw_taken = EQ;
2091 if (prob.initialized_p ())
2093 /* FIXME: This is not optimal. We do not really know the probablity
2094 that values differ by MCW only, but we should probably distribute
2095 probabilities more evenly. */
2096 msw_skip_prob = rev_prob;
2097 lsw_taken_prob = prob > profile_probability::never ()
2098 ? profile_probability::guessed_always ()
2099 : profile_probability::guessed_never ();
2101 break;
2102 case NE:
2103 msw_taken = NE;
2104 msw_taken_prob = prob;
2105 lsw_taken = NE;
2106 lsw_taken_prob = profile_probability::guessed_never ();
2107 break;
2108 case GTU: case GT:
2109 msw_taken = comparison;
2110 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2111 break;
2112 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2113 msw_skip = swap_condition (msw_taken);
2114 lsw_taken = GTU;
2115 break;
2116 case GEU: case GE:
2117 if (op2l == CONST0_RTX (SImode))
2118 msw_taken = comparison;
2119 else
2121 msw_taken = comparison == GE ? GT : GTU;
2122 msw_skip = swap_condition (msw_taken);
2123 lsw_taken = GEU;
2125 break;
2126 case LTU: case LT:
2127 msw_taken = comparison;
2128 if (op2l == CONST0_RTX (SImode))
2129 break;
2130 msw_skip = swap_condition (msw_taken);
2131 lsw_taken = LTU;
2132 break;
2133 case LEU: case LE:
2134 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2135 msw_taken = comparison;
2136 else
2138 lsw_taken = LEU;
2139 if (comparison == LE)
2140 msw_taken = LT;
2141 else if (op2h != CONST0_RTX (SImode))
2142 msw_taken = LTU;
2143 else
2145 msw_skip = swap_condition (LTU);
2146 break;
2148 msw_skip = swap_condition (msw_taken);
2150 break;
2151 default: return false;
2153 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2154 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2155 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2156 if (comparison != EQ && comparison != NE && num_branches > 1)
2158 if (!CONSTANT_P (operands[2])
2159 && prob.initialized_p ()
2160 && prob.to_reg_br_prob_base () >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2161 && prob.to_reg_br_prob_base () <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2163 msw_taken_prob = prob.apply_scale (1, 2);
2164 msw_skip_prob = rev_prob.apply_scale (REG_BR_PROB_BASE,
2165 rev_prob.to_reg_br_prob_base ()
2166 + REG_BR_PROB_BASE);
2167 lsw_taken_prob = prob;
2169 else
2171 msw_taken_prob = prob;
2172 msw_skip_prob = profile_probability::guessed_always ();
2173 /* ??? If we have a constant op2h, should we use that when
2174 calculating lsw_taken_prob? */
2175 lsw_taken_prob = prob;
2178 operands[1] = op1h;
2179 operands[2] = op2h;
2181 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2182 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2183 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2185 rtx taken_label = operands[3];
2187 /* Operands were possibly modified, but msw_skip doesn't expect this.
2188 Always use the original ones. */
2189 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2191 operands[1] = op1h;
2192 operands[2] = op2h;
2195 operands[3] = skip_label = gen_label_rtx ();
2196 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2197 operands[3] = taken_label;
2199 operands[1] = op1l;
2200 operands[2] = op2l;
2201 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2202 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2203 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2204 emit_label (skip_label);
2205 return true;
2208 /* Given an operand, return 1 if the evaluated operand plugged into an
2209 if_then_else will result in a branch_true, 0 if branch_false, or
2210 -1 if neither nor applies. The truth table goes like this:
2212 op | cmpval | code | result
2213 ---------+--------+---------+--------------------
2214 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2215 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2216 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2217 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2218 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2219 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2220 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2221 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2223 sh_eval_treg_value (rtx op)
2225 if (t_reg_operand (op, GET_MODE (op)))
2226 return 1;
2227 if (negt_reg_operand (op, GET_MODE (op)))
2228 return 0;
2230 rtx_code code = GET_CODE (op);
2231 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2232 return -1;
2234 int cmpop = code == EQ ? 1 : 0;
2235 int cmpval = INTVAL (XEXP (op, 1));
2236 if (cmpval != 0 && cmpval != 1)
2237 return -1;
2239 int t;
2240 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2241 t = 0;
2242 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2243 t = 1;
2244 else
2245 return -1;
2247 return t ^ (cmpval == cmpop);
2250 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2251 of floating-point comparisons. */
2252 static void
2253 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2255 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2256 && GET_CODE (insn) != PARALLEL)
2258 insn = gen_rtx_PARALLEL (VOIDmode,
2259 gen_rtvec (3, insn,
2260 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2261 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2263 emit_insn (insn);
2266 /* Prepare the operands for an scc instruction; make sure that the
2267 compare has been done and the result is in T_REG. */
2268 void
2269 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2271 rtx t_reg = get_t_reg_rtx ();
2272 enum rtx_code oldcode = code;
2274 /* First need a compare insn. */
2275 switch (code)
2277 case NE:
2278 /* It isn't possible to handle this case. */
2279 gcc_unreachable ();
2280 case LT:
2281 code = GT;
2282 break;
2283 case LE:
2284 code = GE;
2285 break;
2286 case LTU:
2287 code = GTU;
2288 break;
2289 case LEU:
2290 code = GEU;
2291 break;
2292 default:
2293 break;
2295 if (code != oldcode)
2296 std::swap (op0, op1);
2298 machine_mode mode = GET_MODE (op0);
2299 if (mode == VOIDmode)
2300 mode = GET_MODE (op1);
2302 op0 = force_reg (mode, op0);
2303 if ((code != EQ && code != NE
2304 && (op1 != const0_rtx
2305 || code == GTU || code == GEU || code == LTU || code == LEU))
2306 || (mode == DImode && op1 != const0_rtx)
2307 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2308 op1 = force_reg (mode, op1);
2310 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2311 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2312 mode);
2315 /* Called from the md file, set up the operands of a compare instruction. */
2316 void
2317 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2319 enum rtx_code code = GET_CODE (operands[0]);
2320 enum rtx_code branch_code;
2321 rtx op0 = operands[1];
2322 rtx op1 = operands[2];
2323 rtx insn;
2324 bool need_ccmpeq = false;
2326 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2328 op0 = force_reg (mode, op0);
2329 op1 = force_reg (mode, op1);
2331 else
2333 if (code != EQ || mode == DImode)
2335 /* Force args into regs, since we can't use constants here. */
2336 op0 = force_reg (mode, op0);
2337 if (op1 != const0_rtx || code == GTU || code == GEU)
2338 op1 = force_reg (mode, op1);
2342 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2344 if (code == LT
2345 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2346 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2348 std::swap (op0, op1);
2349 code = swap_condition (code);
2352 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2353 if (code == GE)
2355 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2356 need_ccmpeq = true;
2357 code = GT;
2360 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2361 to EQ/GT respectively. */
2362 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2365 switch (code)
2367 case EQ:
2368 case GT:
2369 case GE:
2370 case GTU:
2371 case GEU:
2372 branch_code = code;
2373 break;
2374 case NE:
2375 case LT:
2376 case LE:
2377 case LTU:
2378 case LEU:
2379 branch_code = reverse_condition (code);
2380 break;
2381 default:
2382 gcc_unreachable ();
2385 insn = gen_rtx_SET (get_t_reg_rtx (),
2386 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2388 sh_emit_set_t_insn (insn, mode);
2389 if (need_ccmpeq)
2390 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2392 if (branch_code == code)
2393 emit_jump_insn (gen_branch_true (operands[3]));
2394 else
2395 emit_jump_insn (gen_branch_false (operands[3]));
2398 void
2399 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2401 enum rtx_code code = GET_CODE (operands[1]);
2402 rtx op0 = operands[2];
2403 rtx op1 = operands[3];
2404 rtx_code_label *lab = NULL;
2405 bool invert = false;
2407 op0 = force_reg (mode, op0);
2408 if ((code != EQ && code != NE
2409 && (op1 != const0_rtx
2410 || code == GTU || code == GEU || code == LTU || code == LEU))
2411 || (mode == DImode && op1 != const0_rtx)
2412 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2413 op1 = force_reg (mode, op1);
2415 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2417 if (code == LT || code == LE)
2419 std::swap (op0, op1);
2420 code = swap_condition (code);
2422 if (code == GE)
2424 if (TARGET_IEEE)
2426 lab = gen_label_rtx ();
2427 sh_emit_scc_to_t (EQ, op0, op1);
2428 emit_jump_insn (gen_branch_true (lab));
2429 code = GT;
2431 else
2433 code = LT;
2434 invert = true;
2439 if (code == NE)
2441 code = EQ;
2442 invert = true;
2445 sh_emit_scc_to_t (code, op0, op1);
2446 if (lab)
2447 emit_label (lab);
2448 if (invert)
2449 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2450 else
2451 emit_move_insn (operands[0], get_t_reg_rtx ());
2454 /* Functions to output assembly code. */
2456 /* Return a sequence of instructions to perform DI or DF move.
2458 Since the SH cannot move a DI or DF in one instruction, we have
2459 to take care when we see overlapping source and dest registers. */
2460 const char *
2461 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2462 machine_mode mode)
2464 rtx dst = operands[0];
2465 rtx src = operands[1];
2467 if (MEM_P (dst)
2468 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2469 return "mov.l %T1,%0" "\n"
2470 " mov.l %1,%0";
2472 if (register_operand (dst, mode)
2473 && register_operand (src, mode))
2475 if (REGNO (src) == MACH_REG)
2476 return "sts mach,%S0" "\n"
2477 " sts macl,%R0";
2479 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2480 when mov.d r1,r0 do r1->r0 then r2->r1. */
2481 if (REGNO (src) + 1 == REGNO (dst))
2482 return "mov %T1,%T0" "\n"
2483 " mov %1,%0";
2484 else
2485 return "mov %1,%0" "\n"
2486 " mov %T1,%T0";
2488 else if (CONST_INT_P (src))
2490 if (INTVAL (src) < 0)
2491 output_asm_insn ("mov #-1,%S0", operands);
2492 else
2493 output_asm_insn ("mov #0,%S0", operands);
2495 return "mov %1,%R0";
2497 else if (MEM_P (src))
2499 int ptrreg = -1;
2500 int dreg = REGNO (dst);
2501 rtx inside = XEXP (src, 0);
2503 switch (GET_CODE (inside))
2505 case REG:
2506 ptrreg = REGNO (inside);
2507 break;
2509 case SUBREG:
2510 ptrreg = subreg_regno (inside);
2511 break;
2513 case PLUS:
2514 ptrreg = REGNO (XEXP (inside, 0));
2515 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2516 an offsettable address. Unfortunately, offsettable addresses use
2517 QImode to check the offset, and a QImode offsettable address
2518 requires r0 for the other operand, which is not currently
2519 supported, so we can't use the 'o' constraint.
2520 Thus we must check for and handle r0+REG addresses here.
2521 We punt for now, since this is likely very rare. */
2522 gcc_assert (!REG_P (XEXP (inside, 1)));
2523 break;
2525 case LABEL_REF:
2526 return "mov.l %1,%0" "\n"
2527 " mov.l %1+4,%T0";
2528 case POST_INC:
2529 return "mov.l %1,%0" "\n"
2530 " mov.l %1,%T0";
2531 default:
2532 gcc_unreachable ();
2535 /* Work out the safe way to copy. Copy into the second half first. */
2536 if (dreg == ptrreg)
2537 return "mov.l %T1,%T0" "\n"
2538 " mov.l %1,%0";
2541 return "mov.l %1,%0" "\n"
2542 " mov.l %T1,%T0";
2545 /* Print an instruction which would have gone into a delay slot after
2546 another instruction, but couldn't because the other instruction expanded
2547 into a sequence where putting the slot insn at the end wouldn't work. */
2548 static void
2549 print_slot (rtx_sequence *seq)
2551 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2553 seq->insn (1)->set_deleted ();
2556 const char *
2557 output_far_jump (rtx_insn *insn, rtx op)
2559 struct { rtx lab, reg, op; } this_jmp;
2560 rtx_code_label *braf_base_lab = NULL;
2561 const char *jump;
2562 int far;
2563 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2564 rtx_insn *prev;
2566 this_jmp.lab = gen_label_rtx ();
2568 if (TARGET_SH2
2569 && offset >= -32764
2570 && offset - get_attr_length (insn) <= 32766
2571 && ! CROSSING_JUMP_P (insn))
2573 far = 0;
2574 jump = "mov.w %O0,%1" "\n"
2575 " braf %1";
2577 else
2579 far = 1;
2580 if (flag_pic)
2582 if (TARGET_SH2)
2583 jump = "mov.l %O0,%1" "\n"
2584 " braf %1";
2585 else
2586 jump = "mov.l r0,@-r15" "\n"
2587 " mova %O0,r0" "\n"
2588 " mov.l @r0,%1" "\n"
2589 " add r0,%1" "\n"
2590 " mov.l @r15+,r0" "\n"
2591 " jmp @%1";
2593 else
2594 jump = "mov.l %O0,%1" "\n"
2595 " jmp @%1";
2597 /* If we have a scratch register available, use it. */
2598 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2599 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2601 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2602 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2603 jump = "mov.l r1,@-r15" "\n"
2604 " mova %O0,r0" "\n"
2605 " mov.l @r0,r1" "\n"
2606 " add r1,r0" "\n"
2607 " mov.l @r15+,r1" "\n"
2608 " jmp @%1";
2609 output_asm_insn (jump, &this_jmp.lab);
2610 if (dbr_sequence_length ())
2611 print_slot (final_sequence);
2612 else
2613 output_asm_insn ("nop", 0);
2615 else
2617 /* Output the delay slot insn first if any. */
2618 if (dbr_sequence_length ())
2619 print_slot (final_sequence);
2621 this_jmp.reg = gen_rtx_REG (SImode, 13);
2622 output_asm_insn ("mov.l r13,@-r15", 0);
2623 output_asm_insn (jump, &this_jmp.lab);
2624 output_asm_insn ("mov.l @r15+,r13", 0);
2626 if (far && flag_pic && TARGET_SH2)
2628 braf_base_lab = gen_label_rtx ();
2629 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2630 CODE_LABEL_NUMBER (braf_base_lab));
2632 if (far)
2633 output_asm_insn (".align 2", 0);
2634 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2635 this_jmp.op = op;
2636 if (far && flag_pic)
2638 if (TARGET_SH2)
2639 this_jmp.lab = braf_base_lab;
2640 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2642 else
2643 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2644 return "";
2647 /* Local label counter, used for constants in the pool and inside
2648 pattern branches. */
2649 static int lf = 100;
2651 /* Output code for ordinary branches. */
2652 const char *
2653 output_branch (int logic, rtx_insn *insn, rtx *operands)
2655 switch (get_attr_length (insn))
2657 case 6:
2658 /* This can happen if filling the delay slot has caused a forward
2659 branch to exceed its range (we could reverse it, but only
2660 when we know we won't overextend other branches; this should
2661 best be handled by relaxation).
2662 It can also happen when other condbranches hoist delay slot insn
2663 from their destination, thus leading to code size increase.
2664 But the branch will still be in the range -4092..+4098 bytes. */
2665 if (! TARGET_RELAX)
2667 int label = lf++;
2668 /* The call to print_slot will clobber the operands. */
2669 rtx op0 = operands[0];
2671 /* If the instruction in the delay slot is annulled (true), then
2672 there is no delay slot where we can put it now. The only safe
2673 place for it is after the label. final will do that by default. */
2675 if (final_sequence
2676 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2677 && get_attr_length (final_sequence->insn (1)))
2679 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2680 ASSEMBLER_DIALECT ? "/" : ".", label);
2681 print_slot (final_sequence);
2683 else
2684 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2686 output_asm_insn ("bra\t%l0", &op0);
2687 fprintf (asm_out_file, "\tnop\n");
2688 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2690 return "";
2692 /* FALLTHRU */
2693 /* When relaxing, handle this like a short branch. The linker
2694 will fix it up if it still doesn't fit after relaxation. */
2695 case 2:
2696 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2698 /* These are for SH2e, in which we have to account for the
2699 extra nop because of the hardware bug in annulled branches. */
2700 case 8:
2701 if (! TARGET_RELAX)
2703 int label = lf++;
2705 gcc_assert (!final_sequence
2706 || !(INSN_ANNULLED_BRANCH_P
2707 (XVECEXP (final_sequence, 0, 0))));
2708 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2709 logic ? "f" : "t",
2710 ASSEMBLER_DIALECT ? "/" : ".", label);
2711 fprintf (asm_out_file, "\tnop\n");
2712 output_asm_insn ("bra\t%l0", operands);
2713 fprintf (asm_out_file, "\tnop\n");
2714 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2716 return "";
2718 /* FALLTHRU */
2719 case 4:
2721 char buffer[10];
2723 sprintf (buffer, "b%s%ss\t%%l0",
2724 logic ? "t" : "f",
2725 ASSEMBLER_DIALECT ? "/" : ".");
2726 output_asm_insn (buffer, &operands[0]);
2727 return "nop";
2730 default:
2731 /* There should be no longer branches now - that would
2732 indicate that something has destroyed the branches set
2733 up in machine_dependent_reorg. */
2734 gcc_unreachable ();
2738 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2739 fill in operands 9 as a label to the successor insn.
2740 We try to use jump threading where possible.
2741 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2742 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2743 follow jmp and bt, if the address is in range. */
2744 const char *
2745 output_branchy_insn (enum rtx_code code, const char *templ,
2746 rtx_insn *insn, rtx *operands)
2748 rtx_insn *next_insn = NEXT_INSN (insn);
2750 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2752 rtx src = SET_SRC (PATTERN (next_insn));
2753 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2755 /* Following branch not taken */
2756 rtx_code_label *lab = gen_label_rtx ();
2757 emit_label_after (lab, next_insn);
2758 INSN_ADDRESSES_NEW (lab,
2759 INSN_ADDRESSES (INSN_UID (next_insn))
2760 + get_attr_length (next_insn));
2761 operands[9] = lab;
2762 return templ;
2764 else
2766 int offset = (branch_dest (next_insn)
2767 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2768 if (offset >= -252 && offset <= 258)
2770 if (GET_CODE (src) == IF_THEN_ELSE)
2771 /* branch_true */
2772 src = XEXP (src, 1);
2773 operands[9] = src;
2774 return templ;
2778 rtx_code_label *lab = gen_label_rtx ();
2779 emit_label_after (lab, insn);
2780 INSN_ADDRESSES_NEW (lab,
2781 INSN_ADDRESSES (INSN_UID (insn))
2782 + get_attr_length (insn));
2783 operands[9] = lab;
2784 return templ;
2787 const char *
2788 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2790 return output_branchy_insn (NE, "bt %l9" "\n"
2791 " fcmp/eq %1,%0",
2792 insn, operands);
2795 /* Output the start of the assembler file. */
2796 static void
2797 sh_file_start (void)
2799 default_file_start ();
2801 if (TARGET_ELF)
2802 /* We need to show the text section with the proper
2803 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2804 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2805 will complain. We can teach GAS specifically about the
2806 default attributes for our choice of text section, but
2807 then we would have to change GAS again if/when we change
2808 the text section name. */
2809 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2810 else
2811 /* Switch to the data section so that the coffsem symbol
2812 isn't in the text section. */
2813 switch_to_section (data_section);
2815 if (TARGET_LITTLE_ENDIAN)
2816 fputs ("\t.little\n", asm_out_file);
2819 /* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions
2820 need to be output as pointers to function descriptors for
2821 FDPIC. */
2823 static bool
2824 sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
2826 if (TARGET_FDPIC && size == UNITS_PER_WORD
2827 && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value))
2829 fputs ("\t.long\t", asm_out_file);
2830 output_addr_const (asm_out_file, value);
2831 fputs ("@FUNCDESC\n", asm_out_file);
2832 return true;
2834 return default_assemble_integer (value, size, aligned_p);
2837 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2838 static bool
2839 unspec_caller_rtx_p (rtx pat)
2841 rtx base, offset;
2842 split_const (pat, &base, &offset);
2844 if (GET_CODE (base) == UNSPEC)
2846 if (XINT (base, 1) == UNSPEC_CALLER)
2847 return true;
2848 for (int i = 0; i < XVECLEN (base, 0); i++)
2849 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2850 return true;
2852 return false;
2855 /* Indicate that INSN cannot be duplicated. This is true for insn
2856 that generates a unique label. */
2857 static bool
2858 sh_cannot_copy_insn_p (rtx_insn *insn)
2860 if (!reload_completed || !flag_pic)
2861 return false;
2863 if (!NONJUMP_INSN_P (insn))
2864 return false;
2865 if (asm_noperands (insn) >= 0)
2866 return false;
2868 rtx pat = PATTERN (insn);
2870 if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
2871 return false;
2873 if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL)
2875 rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
2876 if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0)))
2877 return true;
2880 if (GET_CODE (pat) != SET)
2881 return false;
2882 pat = SET_SRC (pat);
2884 if (unspec_caller_rtx_p (pat))
2885 return true;
2887 return false;
2890 /* Number of instructions used to make an arithmetic right shift by N. */
2891 static const char ashiftrt_insns[] =
2892 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2894 /* Description of a logical left or right shift, when expanded to a sequence
2895 of 1/2/8/16 shifts.
2896 Notice that one bit right shifts clobber the T bit. One bit left shifts
2897 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2898 enum
2900 ASHL_CLOBBERS_T = 1 << 0,
2901 LSHR_CLOBBERS_T = 1 << 1
2904 struct ashl_lshr_sequence
2906 char insn_count;
2907 signed char amount[6];
2908 char clobbers_t;
2911 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2913 { 0, { 0 }, 0 }, // 0
2914 { 1, { 1 }, LSHR_CLOBBERS_T },
2915 { 1, { 2 }, 0 },
2916 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2917 { 2, { 2, 2 }, 0 }, // 4
2918 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2919 { 3, { 2, 2, 2 }, 0 },
2920 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2921 { 1, { 8 }, 0 }, // 8
2922 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2923 { 2, { 8, 2 }, 0 },
2924 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2925 { 3, { 8, 2, 2 }, 0 }, // 12
2926 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2927 { 3, { 8, -2, 8 }, 0 },
2928 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2929 { 1, { 16 }, 0 }, // 16
2930 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2931 { 2, { 16, 2 }, 0 },
2932 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2933 { 3, { 16, 2, 2 }, 0 }, // 20
2934 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2935 { 3, { 16, -2, 8 }, 0 },
2936 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2937 { 2, { 16, 8 }, 0 }, // 24
2938 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2939 { 3, { 16, 8, 2 }, 0 },
2940 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2941 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2942 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2943 { 3, { 16, -2, 16 }, 0 },
2945 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2946 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2947 However, the shift-and combiner code needs this entry here to be in
2948 terms of real shift insns. */
2949 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2952 /* Individual shift amounts for shift amounts < 16, up to three highmost
2953 bits might be clobbered. This is typically used when combined with some
2954 kind of sign or zero extension. */
2955 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
2957 { 0, { 0 }, 0 }, // 0
2958 { 1, { 1 }, LSHR_CLOBBERS_T },
2959 { 1, { 2 }, 0 },
2960 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2961 { 2, { 2, 2 }, 0 }, // 4
2962 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2963 { 2, { 8, -2 }, 0 },
2964 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
2965 { 1, { 8 }, 0 }, // 8
2966 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2967 { 2, { 8, 2 }, 0 },
2968 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2969 { 3, { 8, 2, 2 }, 0 }, // 12
2970 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
2971 { 2, { 16, -2 }, 0 },
2972 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
2973 { 1, { 16 }, 0 }, // 16
2974 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2975 { 2, { 16, 2 }, 0 },
2976 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2977 { 3, { 16, 2, 2 }, 0 }, // 20
2978 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2979 { 3, { 16, -2, 8 }, 0 },
2980 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2981 { 2, { 16, 8 }, 0 }, // 24
2982 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2983 { 3, { 16, 8, 2 }, 0 },
2984 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2985 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2986 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2987 { 3, { 16, -2, 16 }, 0 },
2988 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2991 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
2992 will clobber the T bit. */
2993 bool
2994 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
2996 gcc_assert (CONST_INT_P (shift_amount));
2998 const int shift_amount_i = INTVAL (shift_amount) & 31;
3000 /* Special case for shift count of 31: use and-rotl sequence. */
3001 if (shift_amount_i == 31)
3002 return true;
3004 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3005 & ASHL_CLOBBERS_T) != 0;
3008 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3009 instructions will clobber the T bit. */
3010 bool
3011 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3013 gcc_assert (CONST_INT_P (shift_amount));
3015 /* For right shifts the constant might be negative. */
3016 const int shift_amount_i = std::abs (INTVAL (shift_amount)) & 31;
3018 /* Special case for shift count of 31: use shll-movt sequence. */
3019 if (shift_amount_i == 31)
3020 return true;
3022 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3023 & LSHR_CLOBBERS_T) != 0;
3026 /* Return true if it is potentially beneficial to use a dynamic shift
3027 instruction (shad / shar) instead of a combination of 1/2/8/16
3028 shift instructions for the specified shift count.
3029 If dynamic shifts are not available, always return false. */
3030 bool
3031 sh_dynamicalize_shift_p (rtx count)
3033 gcc_assert (CONST_INT_P (count));
3035 /* For right shifts the constant might be negative. */
3036 const int shift_amount_i = std::abs (INTVAL (count)) & 31;
3037 int insn_count;
3039 /* For left and right shifts, there are shorter 2 insn sequences for
3040 shift amounts of 31. */
3041 if (shift_amount_i == 31)
3042 insn_count = 2;
3043 else
3044 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3046 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3049 /* Assuming we have a value that has been sign-extended by at least one bit,
3050 can we use the ext_shift_amounts with the last shift turned to an
3051 arithmetic shift to shift it by N without data loss, and quicker than by
3052 other means? */
3053 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3055 /* Return the cost of a shift. */
3056 static inline int
3057 shiftcosts (rtx x)
3059 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3061 if (GET_MODE (x) == DImode
3062 && CONST_INT_P (XEXP (x, 1))
3063 && INTVAL (XEXP (x, 1)) == 1)
3064 return 2;
3066 /* Everything else is invalid, because there is no pattern for it. */
3067 return -1;
3069 /* If shift by a non constant, then this will be expensive. */
3070 if (!CONST_INT_P (XEXP (x, 1)))
3071 return SH_DYNAMIC_SHIFT_COST;
3073 /* Otherwise, return the true cost in instructions. Cope with out of range
3074 shift counts more or less arbitrarily. */
3075 int value = INTVAL (XEXP (x, 1)) & 31;
3077 if (GET_CODE (x) == ASHIFTRT)
3079 int cost = ashiftrt_insns[value];
3080 /* If dynamic shifts are available and profitable in this case, then we
3081 put the constant in a reg and use shad. */
3082 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3083 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3084 return cost;
3086 else
3087 return ashl_lshr_seq[value].insn_count;
3090 /* Return the cost of an AND/XOR/IOR operation. */
3091 static inline int
3092 and_xor_ior_costs (rtx x, int code)
3094 /* On SH1-4 we have only max. SImode operations.
3095 Double the cost for modes > SImode. */
3096 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3098 /* A logical operation with two registers is a single cycle
3099 instruction. */
3100 if (!CONST_INT_P (XEXP (x, 1)))
3101 return 1 * cost_scale;
3103 int i = INTVAL (XEXP (x, 1));
3105 /* These constants are single cycle extu.[bw] instructions. */
3106 if ((i == 0xff || i == 0xffff) && code == AND)
3107 return 1 * cost_scale;
3108 /* Constants that can be used in an instruction as an immediate are
3109 a single cycle, but this requires r0, so make it a little more
3110 expensive. */
3111 if (CONST_OK_FOR_K08 (i))
3112 return 2 * cost_scale;
3113 /* Constants that can be loaded with a mov immediate need one more cycle.
3114 This case is probably unnecessary. */
3115 if (CONST_OK_FOR_I08 (i))
3116 return 2 * cost_scale;
3117 /* Any other constant requires an additional 2 cycle pc-relative load.
3118 This case is probably unnecessary. */
3119 return 3 * cost_scale;
3122 /* Return the cost of an addition or a subtraction. */
3123 static inline int
3124 addsubcosts (rtx x)
3126 if (GET_MODE (x) == SImode)
3128 /* The addc or subc patterns will eventually become one or two
3129 instructions. Below are some costs for some of the patterns
3130 which combine would reject because the costs of the individual
3131 insns in the patterns are lower.
3133 FIXME: It would be much easier if we had something like insn cost
3134 attributes and the cost calculation machinery used those attributes
3135 in the first place. This would eliminate redundant recog-like C
3136 code to calculate costs of complex patterns. */
3137 rtx op0 = XEXP (x, 0);
3138 rtx op1 = XEXP (x, 1);
3140 if (GET_CODE (x) == PLUS)
3142 if (GET_CODE (op0) == AND
3143 && XEXP (op0, 1) == const1_rtx
3144 && (GET_CODE (op1) == PLUS
3145 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3146 return 1;
3148 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3149 && GET_CODE (op1) == LSHIFTRT
3150 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3151 return 1;
3153 /* Let's assume that adding the result of an insns that stores into
3154 the T bit is cheap. */
3155 if (treg_set_expr (op1, SImode))
3156 return 1;
3157 if (treg_set_expr (op0, SImode))
3158 return 1;
3161 /* On SH1-4 we have only max. SImode operations.
3162 Double the cost for modes > SImode. */
3163 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3165 /* Adding a register is a single cycle insn. */
3166 if (REG_P (XEXP (x, 1))
3167 || GET_CODE (XEXP (x, 1)) == SUBREG)
3168 return 1 * cost_scale;
3170 /* Likewise for small constants. */
3171 if (CONST_INT_P (XEXP (x, 1))
3172 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3173 return 1 * cost_scale;
3175 /* Any other constant requires a 2 cycle pc-relative load plus an
3176 addition. */
3177 return 3 * cost_scale;
3180 /* Return the cost of a multiply. */
3181 static inline int
3182 multcosts (rtx x ATTRIBUTE_UNUSED)
3184 if (sh_multcost >= 0)
3185 return sh_multcost;
3187 if (TARGET_SH2)
3189 /* We have a mul insn, so we can never take more than the mul and the
3190 read of the mac reg, but count more because of the latency and extra
3191 reg usage. */
3192 if (optimize_size)
3193 return 2;
3194 return 3;
3197 /* If we're aiming at small code, then just count the number of
3198 insns in a multiply call sequence. */
3199 if (optimize_size)
3200 return 5;
3202 /* Otherwise count all the insns in the routine we'd be calling too. */
3203 return 20;
3206 /* Compute a (partial) cost for rtx X. Return true if the complete
3207 cost has been computed, and false if subexpressions should be
3208 scanned. In either case, *TOTAL contains the cost result. */
3209 static bool
3210 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
3211 int opno ATTRIBUTE_UNUSED,
3212 int *total, bool speed ATTRIBUTE_UNUSED)
3214 int code = GET_CODE (x);
3216 switch (code)
3218 /* The lower-subreg pass decides whether to split multi-word regs
3219 into individual regs by looking at the cost for a SET of certain
3220 modes with the following patterns:
3221 (set (reg) (reg))
3222 (set (reg) (const_int 0))
3223 On machines that support vector-move operations a multi-word move
3224 is the same cost as individual reg move. On SH there is no
3225 vector-move, so we have to provide the correct cost in the number
3226 of move insns to load/store the reg of the mode in question. */
3227 case SET:
3228 if (sh_movt_set_dest (x) != NULL || sh_movrt_set_dest (x) != NULL)
3230 *total = COSTS_N_INSNS (1);
3231 return true;
3234 if (register_operand (SET_DEST (x), VOIDmode)
3235 && (register_operand (SET_SRC (x), VOIDmode)
3236 || satisfies_constraint_Z (SET_SRC (x))))
3238 const machine_mode mode = GET_MODE (SET_DEST (x));
3239 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3240 / mov_insn_size (mode, TARGET_SH2A));
3241 return true;
3243 return false;
3245 /* The cost of a mem access is mainly the cost of the address mode. */
3246 case MEM:
3247 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3248 true);
3249 return true;
3251 case IF_THEN_ELSE:
3252 /* This case is required for the if_then_else negc pattern. */
3253 if (treg_set_expr (XEXP (x, 0), SImode))
3255 *total = COSTS_N_INSNS (1);
3256 return true;
3258 else
3259 return false;
3261 /* Zero extracts of single bits are usually combine patterns for the
3262 tst insns. */
3263 case ZERO_EXTRACT:
3264 if (GET_CODE (XEXP (x, 0)) == XOR
3265 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3266 && XEXP (x, 1) == const1_rtx
3267 && CONST_INT_P (XEXP (x, 2))
3268 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3269 /* Check that the xor constaint overlaps with the extracted bit. */
3270 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3272 *total = 1; //COSTS_N_INSNS (1);
3273 return true;
3276 /* div0s variant. */
3277 if (GET_CODE (XEXP (x, 0)) == XOR
3278 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3279 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3281 *total = 1;
3282 return true;
3284 return false;
3286 /* The cost of a sign or zero extend depends on whether the source is a
3287 reg or a mem. In case of a mem take the address into account. */
3288 case SIGN_EXTEND:
3289 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3291 *total = COSTS_N_INSNS (1);
3292 return true;
3294 if (MEM_P (XEXP (x, 0)))
3296 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3297 GET_MODE (XEXP (x, 0)),
3298 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3299 return true;
3301 return false;
3303 case ZERO_EXTEND:
3304 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3306 *total = COSTS_N_INSNS (1);
3307 return true;
3309 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3310 && (GET_MODE (XEXP (x, 0)) == QImode
3311 || GET_MODE (XEXP (x, 0)) == HImode))
3313 /* Handle SH2A's movu.b and movu.w insn. */
3314 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3315 GET_MODE (XEXP (x, 0)),
3316 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3317 return true;
3319 return false;
3321 /* mems for SFmode and DFmode can be inside a parallel due to
3322 the way the fpscr is handled. */
3323 case PARALLEL:
3324 for (int i = 0; i < XVECLEN (x, 0); i++)
3326 rtx xx = XVECEXP (x, 0, i);
3327 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3329 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3330 GET_MODE (XEXP (xx, 0)),
3331 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3332 return true;
3334 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3336 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3337 GET_MODE (XEXP (xx, 1)),
3338 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3339 return true;
3343 if (sh_1el_vec (x, VOIDmode))
3344 *total = outer_code != SET;
3345 else if (sh_rep_vec (x, VOIDmode))
3346 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3347 + (outer_code != SET));
3348 else
3349 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3350 return true;
3352 case CONST_INT:
3353 if (CONST_OK_FOR_I08 (INTVAL (x)))
3354 *total = 0;
3355 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3356 && CONST_OK_FOR_K08 (INTVAL (x)))
3357 *total = 1;
3358 /* prepare_cmp_insn will force costly constants int registers before
3359 the cbranch[sd]i4 patterns can see them, so preserve potentially
3360 interesting ones not covered by I08 above. */
3361 else if (outer_code == COMPARE
3362 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3363 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3364 || INTVAL (x) == 0x7fffffff
3365 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3366 *total = 1;
3367 else
3368 *total = 8;
3369 return true;
3371 case EQ:
3372 /* An and with a constant compared against zero is
3373 most likely going to be a TST #imm, R0 instruction. */
3374 if (XEXP (x, 1) == const0_rtx
3375 && ((GET_CODE (XEXP (x, 0)) == AND
3376 || (SUBREG_P (XEXP (x, 0))
3377 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND))
3378 || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT))
3380 *total = 1;
3381 return true;
3384 else if (XEXP (x, 1) == const0_rtx
3385 && GET_CODE (XEXP (x, 0)) == AND
3386 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3387 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3388 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3389 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3391 *total = 1;
3392 return true;
3394 else
3395 return false;
3397 case SMIN:
3398 case SMAX:
3399 /* This is most likely a clips.b or clips.w insn that is being made up
3400 by combine. */
3401 if (TARGET_SH2A
3402 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3403 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3404 && REG_P (XEXP (XEXP (x, 0), 0))
3405 && CONST_INT_P (XEXP (x, 1)))
3407 *total = COSTS_N_INSNS (1);
3408 return true;
3410 else
3411 return false;
3413 case CONST:
3414 case LABEL_REF:
3415 case SYMBOL_REF:
3416 *total = 5;
3417 return true;
3419 case CONST_DOUBLE:
3420 /* prepare_cmp_insn will force costly constants int registers before
3421 the cbranchdi4 pattern can see them, so preserve potentially
3422 interesting ones. */
3423 if (outer_code == COMPARE && GET_MODE (x) == DImode)
3424 *total = 1;
3425 else
3426 *total = 10;
3427 return true;
3429 case CONST_VECTOR:
3430 /* FIXME: This looks broken. Only the last statement has any effect.
3431 Probably this could be folded with the PARALLEL case? */
3432 if (x == CONST0_RTX (GET_MODE (x)))
3433 *total = 0;
3434 else if (sh_1el_vec (x, VOIDmode))
3435 *total = outer_code != SET;
3436 if (sh_rep_vec (x, VOIDmode))
3437 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3438 + (outer_code != SET));
3439 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3440 return true;
3442 case PLUS:
3443 case MINUS:
3444 *total = COSTS_N_INSNS (addsubcosts (x));
3445 return true;
3447 case AND:
3448 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3449 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3451 *total = COSTS_N_INSNS (1);
3452 return true;
3454 /* Fall through. */
3456 case XOR:
3457 case IOR:
3458 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3459 return true;
3461 case MULT:
3462 *total = COSTS_N_INSNS (multcosts (x));
3463 return true;
3465 case LT:
3466 case GE:
3467 /* div0s sign comparison. */
3468 if (GET_CODE (XEXP (x, 0)) == XOR
3469 && REG_P ((XEXP (XEXP (x, 0), 0)))
3470 && REG_P ((XEXP (XEXP (x, 0), 1)))
3471 && satisfies_constraint_Z (XEXP (x, 1)))
3473 *total = COSTS_N_INSNS (1);
3474 return true;
3476 else
3477 return false;
3479 case LSHIFTRT:
3480 /* div0s sign comparison. */
3481 if (GET_CODE (XEXP (x, 0)) == XOR
3482 && REG_P ((XEXP (XEXP (x, 0), 0)))
3483 && REG_P ((XEXP (XEXP (x, 0), 1)))
3484 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3486 *total = COSTS_N_INSNS (1);
3487 return true;
3489 /* FALLTHRU */
3490 case ASHIFT:
3491 case ASHIFTRT:
3493 int cost = shiftcosts (x);
3494 if (cost < 0)
3495 return false;
3496 *total = COSTS_N_INSNS (cost);
3497 return true;
3500 case DIV:
3501 case UDIV:
3502 case MOD:
3503 case UMOD:
3504 *total = COSTS_N_INSNS (20);
3505 return true;
3507 case FLOAT:
3508 case FIX:
3509 *total = 100;
3510 return true;
3512 default:
3513 return false;
3517 /* Determine the size of the fundamental move insn that will be used
3518 for the specified mode. */
3519 static inline int
3520 mov_insn_size (machine_mode mode, bool consider_sh2a)
3522 const int mode_sz = GET_MODE_SIZE (mode);
3524 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3525 || (TARGET_FMOVD && mode == DFmode))
3526 return mode_sz;
3527 else
3529 /* The max. available mode for actual move insns is SImode.
3530 Larger accesses will be split into multiple loads/stores. */
3531 const int max_mov_sz = GET_MODE_SIZE (SImode);
3532 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3536 /* Determine the maximum possible displacement for a move insn for the
3537 specified mode. */
3539 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3541 /* The 4 byte displacement move insns are the same as the 2 byte
3542 versions but take a 12 bit displacement. All we need to do is to
3543 scale the max. displacement value accordingly. */
3544 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3546 /* SH2A supports FPU move insns with 12 bit displacements.
3547 Other variants to do not support any kind of displacements for
3548 FPU move insns. */
3549 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3550 return 0;
3551 else
3553 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3554 const int mode_sz = GET_MODE_SIZE (mode);
3555 int r = 15 * mov_insn_sz * disp_scale;
3557 /* If the mov insn will be split into multiple loads/stores, the
3558 maximum possible displacement is a bit smaller. */
3559 if (mode_sz > mov_insn_sz)
3560 r -= mode_sz - mov_insn_sz;
3561 return r;
3565 /* Determine the alignment mask for a move insn of the
3566 specified mode. */
3567 static inline int
3568 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3570 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3571 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3574 /* Return the displacement value of a displacement address. */
3575 HOST_WIDE_INT
3576 sh_disp_addr_displacement (rtx x)
3578 gcc_assert (satisfies_constraint_Sdd (x));
3579 return INTVAL (XEXP (XEXP (x, 0), 1));
3582 /* Compute the cost of an address. */
3583 static int
3584 sh_address_cost (rtx x, machine_mode mode,
3585 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3587 /* 'GBR + 0'. Account one more because of R0 restriction. */
3588 if (REG_P (x) && REGNO (x) == GBR_REG)
3589 return 2;
3591 /* Simple reg, post-inc, pre-dec addressing. */
3592 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3593 return 1;
3595 /* 'reg + disp' addressing. */
3596 if (GET_CODE (x) == PLUS
3597 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3599 /* 'GBR + disp'. Account one more because of R0 restriction. */
3600 if (REGNO (XEXP (x, 0)) == GBR_REG
3601 && gbr_displacement (XEXP (x, 1), mode))
3602 return 2;
3604 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3606 if (offset == 0)
3607 return 1;
3609 /* The displacement would fit into a 2 byte move insn.
3610 HImode and QImode loads/stores with displacement put pressure on
3611 R0 which will most likely require another reg copy. Thus account
3612 a higher cost for that. */
3613 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3614 return (mode == HImode || mode == QImode) ? 2 : 1;
3616 /* The displacement would fit into a 4 byte move insn (SH2A). */
3617 if (TARGET_SH2A
3618 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3619 return 2;
3621 /* The displacement is probably out of range and will require extra
3622 calculations. */
3623 return 3;
3626 /* 'reg + reg' addressing. Account a slightly higher cost because of
3627 increased pressure on R0. */
3628 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1)))
3629 return 3;
3631 /* Not sure what it is - probably expensive. */
3632 return 10;
3635 /* Code to expand a shift. */
3636 static void
3637 gen_ashift (int type, int n, rtx reg)
3639 rtx n_rtx;
3641 /* Negative values here come from the shift_amounts array. */
3642 if (n < 0)
3644 if (type == ASHIFT)
3645 type = LSHIFTRT;
3646 else
3647 type = ASHIFT;
3648 n = -n;
3651 n_rtx = GEN_INT (n);
3652 gcc_assert (satisfies_constraint_P27 (n_rtx));
3654 switch (type)
3656 case ASHIFTRT:
3657 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3658 break;
3659 case LSHIFTRT:
3660 if (n == 1)
3661 emit_insn (gen_shlr (reg, reg));
3662 else
3663 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3664 break;
3665 case ASHIFT:
3666 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3667 break;
3668 default:
3669 gcc_unreachable ();
3673 /* Code to expand a HImode shift. */
3674 static void
3675 gen_ashift_hi (int type, int n, rtx reg)
3677 /* Negative values here come from the shift_amounts array. */
3678 if (n < 0)
3680 if (type == ASHIFT)
3681 type = LSHIFTRT;
3682 else
3683 type = ASHIFT;
3684 n = -n;
3687 switch (type)
3689 case ASHIFTRT:
3690 case LSHIFTRT:
3691 /* We don't have HImode right shift operations because using the
3692 ordinary 32 bit shift instructions for that doesn't generate proper
3693 zero/sign extension.
3694 gen_ashift_hi is only called in contexts where we know that the
3695 sign extension works out correctly. */
3697 int offset = 0;
3698 if (GET_CODE (reg) == SUBREG)
3700 offset = SUBREG_BYTE (reg);
3701 reg = SUBREG_REG (reg);
3703 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3704 break;
3706 case ASHIFT:
3707 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3708 break;
3712 /* Output RTL to split a constant shift into its component SH constant
3713 shift instructions. */
3714 void
3715 gen_shifty_op (int code, rtx *operands)
3717 int value = INTVAL (operands[2]);
3718 int max, i;
3720 /* Truncate the shift count in case it is out of bounds. */
3721 value = value & 31;
3723 if (value == 31)
3725 if (code == LSHIFTRT)
3727 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3728 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3729 return;
3731 else if (code == ASHIFT)
3733 /* There is a two instruction sequence for 31 bit left shifts,
3734 but it requires r0. */
3735 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3737 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3738 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3739 return;
3743 else if (value == 0)
3745 /* This can happen even when optimizing, if there were subregs before
3746 reload. Don't output a nop here, as this is never optimized away;
3747 use a no-op move instead. */
3748 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3749 return;
3752 max = ashl_lshr_seq[value].insn_count;
3753 for (i = 0; i < max; i++)
3754 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3757 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3758 don't matter. */
3759 void
3760 gen_shifty_hi_op (int code, rtx *operands)
3762 int value = INTVAL (operands[2]);
3763 int max, i;
3764 void (*gen_fun) (int, int, rtx);
3766 /* This operation is used by and_shl for SImode values with a few
3767 high bits known to be cleared. */
3768 value &= 31;
3769 if (value == 0)
3771 emit_insn (gen_nop ());
3772 return;
3775 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3776 if (code == ASHIFT)
3778 max = ext_ashl_lshr_seq[value].insn_count;
3779 for (i = 0; i < max; i++)
3780 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3782 else
3783 /* When shifting right, emit the shifts in reverse order, so that
3784 solitary negative values come first. */
3785 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3786 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3789 /* Output RTL for an arithmetic right shift.
3790 ??? Rewrite to use super-optimizer sequences. */
3791 bool
3792 expand_ashiftrt (rtx *operands)
3794 rtx wrk;
3795 char func[18];
3796 int value;
3798 if (TARGET_DYNSHIFT)
3800 if (!CONST_INT_P (operands[2]))
3802 rtx count = copy_to_mode_reg (SImode, operands[2]);
3803 emit_insn (gen_negsi2 (count, count));
3804 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3805 return true;
3807 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3808 > 1 + SH_DYNAMIC_SHIFT_COST)
3810 rtx count
3811 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3812 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3813 return true;
3816 if (!CONST_INT_P (operands[2]))
3817 return false;
3819 value = INTVAL (operands[2]) & 31;
3821 if (value == 31)
3823 /* If we are called from abs expansion, arrange things so that we
3824 we can use a single MT instruction that doesn't clobber the source,
3825 if LICM can hoist out the load of the constant zero. */
3826 if (currently_expanding_to_rtl)
3828 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3829 operands[1]));
3830 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3831 return true;
3833 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3834 return true;
3836 else if (value >= 16 && value <= 19)
3838 wrk = gen_reg_rtx (SImode);
3839 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3840 value -= 16;
3841 while (value--)
3842 gen_ashift (ASHIFTRT, 1, wrk);
3843 emit_move_insn (operands[0], wrk);
3844 return true;
3846 /* Expand a short sequence inline, longer call a magic routine. */
3847 else if (value <= 5)
3849 wrk = gen_reg_rtx (SImode);
3850 emit_move_insn (wrk, operands[1]);
3851 while (value--)
3852 gen_ashift (ASHIFTRT, 1, wrk);
3853 emit_move_insn (operands[0], wrk);
3854 return true;
3857 wrk = gen_reg_rtx (Pmode);
3859 /* Load the value into an arg reg and call a helper. */
3860 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3861 sprintf (func, "__ashiftrt_r4_%d", value);
3862 rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
3863 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
3864 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3865 return true;
3868 /* Try to find a good way to implement the combiner pattern
3869 [(set (match_operand:SI 0 "register_operand" "r")
3870 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3871 (match_operand:SI 2 "const_int_operand" "n"))
3872 (match_operand:SI 3 "const_int_operand" "n"))) .
3873 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3874 return 0 for simple right / left or left/right shift combination.
3875 return 1 for a combination of shifts with zero_extend.
3876 return 2 for a combination of shifts with an AND that needs r0.
3877 return 3 for a combination of shifts with an AND that needs an extra
3878 scratch register, when the three highmost bits of the AND mask are clear.
3879 return 4 for a combination of shifts with an AND that needs an extra
3880 scratch register, when any of the three highmost bits of the AND mask
3881 is set.
3882 If ATTRP is set, store an initial right shift width in ATTRP[0],
3883 and the instruction length in ATTRP[1] . These values are not valid
3884 when returning 0.
3885 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3886 shift_amounts for the last shift value that is to be used before the
3887 sign extend. */
3889 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3891 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3892 int left = INTVAL (left_rtx), right;
3893 int best = 0;
3894 int cost, best_cost = 10000;
3895 int best_right = 0, best_len = 0;
3896 int i;
3897 int can_ext;
3899 if (left < 0 || left > 31)
3900 return 0;
3901 if (CONST_INT_P (mask_rtx))
3902 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3903 else
3904 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3905 /* Can this be expressed as a right shift / left shift pair? */
3906 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3907 right = exact_log2 (lsb);
3908 mask2 = ~(mask + lsb - 1);
3909 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3910 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3911 if (! mask2)
3912 best_cost = ashl_lshr_seq[right].insn_count
3913 + ashl_lshr_seq[right + left].insn_count;
3914 /* mask has no trailing zeroes <==> ! right */
3915 else if (! right && mask2 == ~(lsb2 - 1))
3917 int late_right = exact_log2 (lsb2);
3918 best_cost = ashl_lshr_seq[left + late_right].insn_count
3919 + ashl_lshr_seq[late_right].insn_count;
3921 /* Try to use zero extend. */
3922 if (mask2 == ~(lsb2 - 1))
3924 int width, first;
3926 for (width = 8; width <= 16; width += 8)
3928 /* Can we zero-extend right away? */
3929 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3931 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3932 + ext_ashl_lshr_seq[left + right].insn_count;
3933 if (cost < best_cost)
3935 best = 1;
3936 best_cost = cost;
3937 best_right = right;
3938 best_len = cost;
3939 if (attrp)
3940 attrp[2] = -1;
3942 continue;
3944 /* ??? Could try to put zero extend into initial right shift,
3945 or even shift a bit left before the right shift. */
3946 /* Determine value of first part of left shift, to get to the
3947 zero extend cut-off point. */
3948 first = width - exact_log2 (lsb2) + right;
3949 if (first >= 0 && right + left - first >= 0)
3951 cost = ext_ashl_lshr_seq[right].insn_count
3952 + ext_ashl_lshr_seq[first].insn_count + 1
3953 + ext_ashl_lshr_seq[right + left - first].insn_count;
3955 if (cost < best_cost)
3957 best = 1;
3958 best_cost = cost;
3959 best_right = right;
3960 best_len = cost;
3961 if (attrp)
3962 attrp[2] = first;
3967 /* Try to use r0 AND pattern */
3968 for (i = 0; i <= 2; i++)
3970 if (i > right)
3971 break;
3972 if (! CONST_OK_FOR_K08 (mask >> i))
3973 continue;
3974 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
3975 if (cost < best_cost)
3977 best = 2;
3978 best_cost = cost;
3979 best_right = i;
3980 best_len = cost - 1;
3983 /* Try to use a scratch register to hold the AND operand. */
3984 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3985 for (i = 0; i <= 2; i++)
3987 if (i > right)
3988 break;
3989 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3990 + (can_ext
3991 ? ext_ashl_lshr_seq
3992 : ashl_lshr_seq)[left + i].insn_count;
3993 if (cost < best_cost)
3995 best = 4 - can_ext;
3996 best_cost = cost;
3997 best_right = i;
3998 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4002 if (attrp)
4004 attrp[0] = best_right;
4005 attrp[1] = best_len;
4007 return best;
4010 /* This is used in length attributes of the unnamed instructions
4011 corresponding to shl_and_kind return values of 1 and 2. */
4013 shl_and_length (rtx insn)
4015 rtx set_src, left_rtx, mask_rtx;
4016 int attributes[3];
4018 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4019 left_rtx = XEXP (XEXP (set_src, 0), 1);
4020 mask_rtx = XEXP (set_src, 1);
4021 shl_and_kind (left_rtx, mask_rtx, attributes);
4022 return attributes[1];
4025 /* This is used in length attribute of the and_shl_scratch instruction. */
4027 shl_and_scr_length (rtx insn)
4029 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4030 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4031 rtx op = XEXP (set_src, 0);
4032 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4033 op = XEXP (XEXP (op, 0), 0);
4034 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4037 /* Generate rtl for instructions for which shl_and_kind advised a particular
4038 method of generating them, i.e. returned zero. */
4039 bool
4040 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4042 int attributes[3];
4043 unsigned HOST_WIDE_INT mask;
4044 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4045 int right, total_shift;
4046 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4048 right = attributes[0];
4049 total_shift = INTVAL (left_rtx) + right;
4050 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4051 switch (kind)
4053 default:
4054 return true;
4055 case 1:
4057 int first = attributes[2];
4058 rtx operands[3];
4060 if (first < 0)
4062 emit_insn ((mask << right) <= 0xff
4063 ? gen_zero_extendqisi2 (dest,
4064 gen_lowpart (QImode, source))
4065 : gen_zero_extendhisi2 (dest,
4066 gen_lowpart (HImode, source)));
4067 source = dest;
4069 if (source != dest)
4070 emit_insn (gen_movsi (dest, source));
4071 operands[0] = dest;
4072 if (right)
4074 operands[2] = GEN_INT (right);
4075 gen_shifty_hi_op (LSHIFTRT, operands);
4077 if (first > 0)
4079 operands[2] = GEN_INT (first);
4080 gen_shifty_hi_op (ASHIFT, operands);
4081 total_shift -= first;
4082 mask <<= first;
4084 if (first >= 0)
4085 emit_insn (mask <= 0xff
4086 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4087 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4088 if (total_shift > 0)
4090 operands[2] = GEN_INT (total_shift);
4091 gen_shifty_hi_op (ASHIFT, operands);
4093 break;
4095 case 4:
4096 shift_gen_fun = gen_shifty_op;
4097 /* FALLTHRU */
4098 case 3:
4099 /* If the topmost bit that matters is set, set the topmost bits
4100 that don't matter. This way, we might be able to get a shorter
4101 signed constant. */
4102 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4103 mask |= (HOST_WIDE_INT) ((HOST_WIDE_INT_M1U) << (31 - total_shift));
4104 /* FALLTHRU */
4105 case 2:
4106 /* Don't expand fine-grained when combining, because that will
4107 make the pattern fail. */
4108 if (currently_expanding_to_rtl
4109 || reload_in_progress || reload_completed)
4111 rtx operands[3];
4113 /* Cases 3 and 4 should be handled by this split
4114 only while combining */
4115 gcc_assert (kind <= 2);
4116 if (right)
4118 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4119 source = dest;
4121 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4122 if (total_shift)
4124 operands[0] = dest;
4125 operands[1] = dest;
4126 operands[2] = GEN_INT (total_shift);
4127 shift_gen_fun (ASHIFT, operands);
4129 break;
4131 else
4133 int neg = 0;
4134 if (kind != 4 && total_shift < 16)
4136 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4137 if (neg > 0)
4138 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4139 else
4140 neg = 0;
4142 emit_insn (gen_and_shl_scratch (dest, source,
4143 GEN_INT (right),
4144 GEN_INT (mask),
4145 GEN_INT (total_shift + neg),
4146 GEN_INT (neg)));
4147 emit_insn (gen_movsi (dest, dest));
4148 break;
4151 return false;
4154 /* Try to find a good way to implement the combiner pattern
4155 [(set (match_operand:SI 0 "register_operand" "=r")
4156 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4157 (match_operand:SI 2 "const_int_operand" "n")
4158 (match_operand:SI 3 "const_int_operand" "n")
4159 (const_int 0)))
4160 (clobber (reg:SI T_REG))]
4161 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4162 return 0 for simple left / right shift combination.
4163 return 1 for left shift / 8 bit sign extend / left shift.
4164 return 2 for left shift / 16 bit sign extend / left shift.
4165 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4166 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4167 return 5 for left shift / 16 bit sign extend / right shift
4168 return 6 for < 8 bit sign extend / left shift.
4169 return 7 for < 8 bit sign extend / left shift / single right shift.
4170 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4172 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4174 int left, size, insize, ext;
4175 int cost = 0, best_cost;
4176 int kind;
4178 left = INTVAL (left_rtx);
4179 size = INTVAL (size_rtx);
4180 insize = size - left;
4181 gcc_assert (insize > 0);
4182 /* Default to left / right shift. */
4183 kind = 0;
4184 best_cost = ashl_lshr_seq[32 - insize].insn_count
4185 + ashl_lshr_seq[32 - size].insn_count;
4186 if (size <= 16)
4188 /* 16 bit shift / sign extend / 16 bit shift */
4189 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4190 + ashl_lshr_seq[16 - size].insn_count;
4191 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4192 below, by alternative 3 or something even better. */
4193 if (cost < best_cost)
4195 kind = 5;
4196 best_cost = cost;
4199 /* Try a plain sign extend between two shifts. */
4200 for (ext = 16; ext >= insize; ext -= 8)
4202 if (ext <= size)
4204 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4205 + ashl_lshr_seq[size - ext].insn_count;
4206 if (cost < best_cost)
4208 kind = ext / (unsigned) 8;
4209 best_cost = cost;
4212 /* Check if we can do a sloppy shift with a final signed shift
4213 restoring the sign. */
4214 if (EXT_SHIFT_SIGNED (size - ext))
4215 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4216 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4217 /* If not, maybe it's still cheaper to do the second shift sloppy,
4218 and do a final sign extend? */
4219 else if (size <= 16)
4220 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4221 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4222 + 1;
4223 else
4224 continue;
4225 if (cost < best_cost)
4227 kind = ext / (unsigned) 8 + 2;
4228 best_cost = cost;
4231 /* Check if we can sign extend in r0 */
4232 if (insize < 8)
4234 cost = 3 + ashl_lshr_seq[left].insn_count;
4235 if (cost < best_cost)
4237 kind = 6;
4238 best_cost = cost;
4240 /* Try the same with a final signed shift. */
4241 if (left < 31)
4243 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4244 if (cost < best_cost)
4246 kind = 7;
4247 best_cost = cost;
4251 if (TARGET_DYNSHIFT)
4253 /* Try to use a dynamic shift. */
4254 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4255 if (cost < best_cost)
4257 kind = 0;
4258 best_cost = cost;
4261 if (costp)
4262 *costp = cost;
4263 return kind;
4266 /* Function to be used in the length attribute of the instructions
4267 implementing this pattern. */
4269 shl_sext_length (rtx insn)
4271 rtx set_src, left_rtx, size_rtx;
4272 int cost;
4274 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4275 left_rtx = XEXP (XEXP (set_src, 0), 1);
4276 size_rtx = XEXP (set_src, 1);
4277 shl_sext_kind (left_rtx, size_rtx, &cost);
4278 return cost;
4281 /* Generate rtl for this pattern */
4282 bool
4283 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4285 int kind;
4286 int left, size, insize, cost;
4287 rtx operands[3];
4289 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4290 left = INTVAL (left_rtx);
4291 size = INTVAL (size_rtx);
4292 insize = size - left;
4293 switch (kind)
4295 case 1:
4296 case 2:
4297 case 3:
4298 case 4:
4300 int ext = kind & 1 ? 8 : 16;
4301 int shift2 = size - ext;
4303 /* Don't expand fine-grained when combining, because that will
4304 make the pattern fail. */
4305 if (! currently_expanding_to_rtl
4306 && ! reload_in_progress && ! reload_completed)
4308 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4309 emit_insn (gen_movsi (dest, source));
4310 break;
4312 if (dest != source)
4313 emit_insn (gen_movsi (dest, source));
4314 operands[0] = dest;
4315 if (ext - insize)
4317 operands[2] = GEN_INT (ext - insize);
4318 gen_shifty_hi_op (ASHIFT, operands);
4320 emit_insn (kind & 1
4321 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4322 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4323 if (kind <= 2)
4325 if (shift2)
4327 operands[2] = GEN_INT (shift2);
4328 gen_shifty_op (ASHIFT, operands);
4331 else
4333 if (shift2 > 0)
4335 if (EXT_SHIFT_SIGNED (shift2))
4337 operands[2] = GEN_INT (shift2 + 1);
4338 gen_shifty_op (ASHIFT, operands);
4339 operands[2] = const1_rtx;
4340 gen_shifty_op (ASHIFTRT, operands);
4341 break;
4343 operands[2] = GEN_INT (shift2);
4344 gen_shifty_hi_op (ASHIFT, operands);
4346 else if (shift2)
4348 operands[2] = GEN_INT (-shift2);
4349 gen_shifty_hi_op (LSHIFTRT, operands);
4351 emit_insn (size <= 8
4352 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4353 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4355 break;
4357 case 5:
4359 int i = 16 - size;
4360 if (! currently_expanding_to_rtl
4361 && ! reload_in_progress && ! reload_completed)
4362 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4363 else
4365 operands[0] = dest;
4366 operands[2] = GEN_INT (16 - insize);
4367 gen_shifty_hi_op (ASHIFT, operands);
4368 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4370 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4371 while (--i >= 0)
4372 gen_ashift (ASHIFTRT, 1, dest);
4373 break;
4375 case 6:
4376 case 7:
4377 /* Don't expand fine-grained when combining, because that will
4378 make the pattern fail. */
4379 if (! currently_expanding_to_rtl
4380 && ! reload_in_progress && ! reload_completed)
4382 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4383 emit_insn (gen_movsi (dest, source));
4384 break;
4386 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4387 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4388 emit_insn (gen_addsi3 (dest, dest, GEN_INT (HOST_WIDE_INT_M1U << (insize - 1))));
4389 operands[0] = dest;
4390 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4391 gen_shifty_op (ASHIFT, operands);
4392 if (kind == 7)
4393 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4394 break;
4395 default:
4396 return true;
4398 return false;
4401 typedef struct label_ref_list_d
4403 rtx_code_label *label;
4404 struct label_ref_list_d *next;
4405 } *label_ref_list_t;
4407 static object_allocator<label_ref_list_d> label_ref_list_d_pool
4408 ("label references list");
4410 /* The SH cannot load a large constant into a register, constants have to
4411 come from a pc relative load. The reference of a pc relative load
4412 instruction must be less than 1k in front of the instruction. This
4413 means that we often have to dump a constant inside a function, and
4414 generate code to branch around it.
4416 It is important to minimize this, since the branches will slow things
4417 down and make things bigger.
4419 Worst case code looks like:
4421 mov.l L1,rn
4422 bra L2
4424 align
4425 L1: .long value
4429 mov.l L3,rn
4430 bra L4
4432 align
4433 L3: .long value
4437 We fix this by performing a scan before scheduling, which notices which
4438 instructions need to have their operands fetched from the constant table
4439 and builds the table.
4441 The algorithm is:
4443 scan, find an instruction which needs a pcrel move. Look forward, find the
4444 last barrier which is within MAX_COUNT bytes of the requirement.
4445 If there isn't one, make one. Process all the instructions between
4446 the find and the barrier.
4448 In the above example, we can tell that L3 is within 1k of L1, so
4449 the first move can be shrunk from the 3 insn+constant sequence into
4450 just 1 insn, and the constant moved to L3 to make:
4452 mov.l L1,rn
4454 mov.l L3,rn
4455 bra L4
4457 align
4458 L3:.long value
4459 L4:.long value
4461 Then the second move becomes the target for the shortening process. */
4463 typedef struct
4465 rtx value; /* Value in table. */
4466 rtx_code_label *label; /* Label of value. */
4467 label_ref_list_t wend; /* End of window. */
4468 machine_mode mode; /* Mode of value. */
4470 /* True if this constant is accessed as part of a post-increment
4471 sequence. Note that HImode constants are never accessed in this way. */
4472 bool part_of_sequence_p;
4473 } pool_node;
4475 /* The maximum number of constants that can fit into one pool, since
4476 constants in the range 0..510 are at least 2 bytes long, and in the
4477 range from there to 1018 at least 4 bytes. */
4479 #define MAX_POOL_SIZE 372
4480 static pool_node pool_vector[MAX_POOL_SIZE];
4481 static int pool_size;
4482 static rtx_code_label *pool_window_label;
4483 static int pool_window_last;
4485 static int max_labelno_before_reorg;
4487 /* ??? If we need a constant in HImode which is the truncated value of a
4488 constant we need in SImode, we could combine the two entries thus saving
4489 two bytes. Is this common enough to be worth the effort of implementing
4490 it? */
4492 /* ??? This stuff should be done at the same time that we shorten branches.
4493 As it is now, we must assume that all branches are the maximum size, and
4494 this causes us to almost always output constant pools sooner than
4495 necessary. */
4497 /* Add a constant to the pool and return its label. */
4498 static rtx_code_label *
4499 add_constant (rtx x, machine_mode mode, rtx last_value)
4501 rtx_code_label *lab, *new_rtx;
4502 label_ref_list_t ref, newref;
4504 /* First see if we've already got it. */
4505 for (int i = 0; i < pool_size; i++)
4507 if (x->code == pool_vector[i].value->code
4508 && mode == pool_vector[i].mode)
4510 if (x->code == CODE_LABEL)
4512 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4513 continue;
4515 if (rtx_equal_p (x, pool_vector[i].value))
4517 lab = new_rtx = 0;
4518 if (! last_value
4519 || ! i
4520 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4522 new_rtx = gen_label_rtx ();
4523 LABEL_REFS (new_rtx) = pool_vector[i].label;
4524 pool_vector[i].label = lab = new_rtx;
4526 if (lab && pool_window_label)
4528 newref = label_ref_list_d_pool.allocate ();
4529 newref->label = pool_window_label;
4530 ref = pool_vector[pool_window_last].wend;
4531 newref->next = ref;
4532 pool_vector[pool_window_last].wend = newref;
4534 if (new_rtx)
4535 pool_window_label = new_rtx;
4536 pool_window_last = i;
4537 return lab;
4542 /* Need a new one. */
4543 pool_vector[pool_size].value = x;
4544 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4546 lab = 0;
4547 pool_vector[pool_size - 1].part_of_sequence_p = true;
4549 else
4550 lab = gen_label_rtx ();
4551 pool_vector[pool_size].mode = mode;
4552 pool_vector[pool_size].label = lab;
4553 pool_vector[pool_size].wend = NULL;
4554 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4555 if (lab && pool_window_label)
4557 newref = label_ref_list_d_pool.allocate ();
4558 newref->label = pool_window_label;
4559 ref = pool_vector[pool_window_last].wend;
4560 newref->next = ref;
4561 pool_vector[pool_window_last].wend = newref;
4563 if (lab)
4564 pool_window_label = lab;
4565 pool_window_last = pool_size;
4566 pool_size++;
4567 return lab;
4570 /* Output the literal table. START, if nonzero, is the first instruction
4571 this table is needed for, and also indicates that there is at least one
4572 casesi_worker_2 instruction; We have to emit the operand3 labels from
4573 these insns at a 4-byte aligned position. BARRIER is the barrier
4574 after which we are to place the table. */
4575 static void
4576 dump_table (rtx_insn *start, rtx_insn *barrier)
4578 rtx_insn *scan = barrier;
4579 bool need_align = true;
4580 rtx lab;
4581 label_ref_list_t ref;
4582 bool have_df = false;
4584 /* Do two passes, first time dump out the HI sized constants. */
4586 for (int i = 0; i < pool_size; i++)
4588 pool_node *p = &pool_vector[i];
4590 if (p->mode == HImode)
4592 if (need_align)
4594 scan = emit_insn_after (gen_align_2 (), scan);
4595 need_align = false;
4597 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4598 scan = emit_label_after (lab, scan);
4599 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4600 scan);
4601 for (ref = p->wend; ref; ref = ref->next)
4603 lab = ref->label;
4604 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4607 else if (p->mode == DFmode)
4608 have_df = true;
4611 need_align = true;
4613 if (start)
4615 scan = emit_insn_after (gen_align_4 (), scan);
4616 need_align = false;
4617 for (; start != barrier; start = NEXT_INSN (start))
4618 if (NONJUMP_INSN_P (start)
4619 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4621 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4622 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4624 scan = emit_label_after (lab, scan);
4627 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4629 rtx_insn *align_insn = NULL;
4631 scan = emit_label_after (gen_label_rtx (), scan);
4632 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4633 need_align = false;
4635 for (int i = 0; i < pool_size; i++)
4637 pool_node *p = &pool_vector[i];
4639 switch (p->mode)
4641 case E_HImode:
4642 break;
4643 case E_SImode:
4644 case E_SFmode:
4645 if (align_insn && !p->part_of_sequence_p)
4647 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4648 emit_label_before (lab, align_insn);
4649 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4650 align_insn);
4651 for (ref = p->wend; ref; ref = ref->next)
4653 lab = ref->label;
4654 emit_insn_before (gen_consttable_window_end (lab),
4655 align_insn);
4657 delete_insn (align_insn);
4658 align_insn = NULL;
4659 continue;
4661 else
4663 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4664 scan = emit_label_after (lab, scan);
4665 scan = emit_insn_after (gen_consttable_4 (p->value,
4666 const0_rtx), scan);
4667 need_align = ! need_align;
4669 break;
4670 case E_DFmode:
4671 if (need_align)
4673 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4674 align_insn = scan;
4675 need_align = false;
4677 /* FALLTHRU */
4678 case E_DImode:
4679 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4680 scan = emit_label_after (lab, scan);
4681 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4682 scan);
4683 break;
4684 default:
4685 gcc_unreachable ();
4688 if (p->mode != HImode)
4690 for (ref = p->wend; ref; ref = ref->next)
4692 lab = ref->label;
4693 scan = emit_insn_after (gen_consttable_window_end (lab),
4694 scan);
4699 pool_size = 0;
4702 for (int i = 0; i < pool_size; i++)
4704 pool_node *p = &pool_vector[i];
4706 switch (p->mode)
4708 case E_HImode:
4709 break;
4710 case E_SImode:
4711 case E_SFmode:
4712 if (need_align)
4714 need_align = false;
4715 scan = emit_label_after (gen_label_rtx (), scan);
4716 scan = emit_insn_after (gen_align_4 (), scan);
4718 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4719 scan = emit_label_after (lab, scan);
4720 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4721 scan);
4722 break;
4723 case E_DFmode:
4724 case E_DImode:
4725 if (need_align)
4727 need_align = false;
4728 scan = emit_label_after (gen_label_rtx (), scan);
4729 scan = emit_insn_after (gen_align_4 (), scan);
4731 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4732 scan = emit_label_after (lab, scan);
4733 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4734 scan);
4735 break;
4736 default:
4737 gcc_unreachable ();
4740 if (p->mode != HImode)
4742 for (ref = p->wend; ref; ref = ref->next)
4744 lab = ref->label;
4745 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4750 scan = emit_insn_after (gen_consttable_end (), scan);
4751 scan = emit_barrier_after (scan);
4752 pool_size = 0;
4753 pool_window_label = NULL;
4754 pool_window_last = 0;
4757 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4759 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4761 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4762 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4763 need to fix it if the input value is CONST_OK_FOR_I08. */
4764 static bool
4765 broken_move (rtx_insn *insn)
4767 if (NONJUMP_INSN_P (insn))
4769 rtx pat = PATTERN (insn);
4770 if (GET_CODE (pat) == PARALLEL)
4771 pat = XVECEXP (pat, 0, 0);
4772 if (GET_CODE (pat) == SET
4773 /* We can load any 8-bit value if we don't care what the high
4774 order bits end up as. */
4775 && GET_MODE (SET_DEST (pat)) != QImode
4776 && (CONSTANT_P (SET_SRC (pat))
4777 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4778 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4779 /* Match mova_const. */
4780 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4781 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4782 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4783 && ! (TARGET_SH2E
4784 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4785 && (fp_zero_operand (SET_SRC (pat))
4786 || fp_one_operand (SET_SRC (pat)))
4787 /* In general we don't know the current setting of fpscr, so
4788 disable fldi.
4789 There is an exception if this was a register-register move
4790 before reload - and hence it was ascertained that we have
4791 single precision setting - and in a post-reload optimization
4792 we changed this to do a constant load. In that case
4793 we don't have an r0 clobber, hence we must use fldi. */
4794 && (TARGET_FMOVD
4795 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4796 == SCRATCH))
4797 && REG_P (SET_DEST (pat))
4798 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4799 && ! (TARGET_SH2A
4800 && GET_MODE (SET_DEST (pat)) == SImode
4801 && (satisfies_constraint_I20 (SET_SRC (pat))
4802 || satisfies_constraint_I28 (SET_SRC (pat))))
4803 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4804 return true;
4807 return false;
4810 /* Return true if the specified insn is a mova insn. */
4811 static bool
4812 mova_p (rtx_insn *insn)
4814 return (NONJUMP_INSN_P (insn)
4815 && GET_CODE (PATTERN (insn)) == SET
4816 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4817 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4818 /* Don't match mova_const. */
4819 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4822 /* Fix up a mova from a switch that went out of range. */
4823 static void
4824 fixup_mova (rtx_insn *mova)
4826 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4827 if (! flag_pic)
4829 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4830 INSN_CODE (mova) = -1;
4832 else
4834 rtx_insn *worker = mova;
4835 rtx_code_label *lab = gen_label_rtx ();
4836 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4840 worker = NEXT_INSN (worker);
4841 gcc_assert (worker
4842 && !LABEL_P (worker)
4843 && !JUMP_P (worker));
4844 } while (NOTE_P (worker)
4845 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4846 wpat = PATTERN (worker);
4847 wpat0 = XVECEXP (wpat, 0, 0);
4848 wpat1 = XVECEXP (wpat, 0, 1);
4849 wsrc = SET_SRC (wpat0);
4850 PATTERN (worker) = (gen_casesi_worker_2
4851 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4852 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4853 XEXP (wpat1, 0)));
4854 INSN_CODE (worker) = -1;
4855 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4856 base = gen_rtx_LABEL_REF (Pmode, lab);
4857 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4858 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4859 INSN_CODE (mova) = -1;
4863 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4864 *num_mova, and check if the new mova is not nested within the first one.
4865 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4866 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4867 static int
4868 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
4870 int n_addr = 0; /* Initialization to shut up spurious warning. */
4871 int f_target, n_target = 0; /* Likewise. */
4873 if (optimize)
4875 /* If NEW_MOVA has no address yet, it will be handled later. */
4876 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4877 return -1;
4879 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4880 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4881 if (n_addr > n_target || n_addr + 1022 < n_target)
4883 /* Change the mova into a load.
4884 broken_move will then return true for it. */
4885 fixup_mova (new_mova);
4886 return 1;
4889 if (!(*num_mova)++)
4891 *first_mova = new_mova;
4892 return 2;
4894 if (!optimize
4895 || ((f_target
4896 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4897 >= n_target))
4898 return -1;
4900 (*num_mova)--;
4901 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4902 > n_target - n_addr)
4904 fixup_mova (*first_mova);
4905 return 0;
4907 else
4909 fixup_mova (new_mova);
4910 return 1;
4914 /* Find the last barrier from insn FROM which is close enough to hold the
4915 constant pool. If we can't find one, then create one near the end of
4916 the range. */
4917 static rtx_insn *
4918 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
4920 int count_si = 0;
4921 int count_hi = 0;
4922 int found_hi = 0;
4923 int found_si = 0;
4924 int hi_align = 2;
4925 int si_align = 2;
4926 int leading_mova = num_mova;
4927 rtx_insn *barrier_before_mova = NULL;
4928 rtx_insn *found_barrier = NULL;
4929 rtx_insn *good_barrier = NULL;
4930 int si_limit;
4931 int hi_limit;
4932 rtx_insn *orig = from;
4933 rtx_insn *last_got = NULL;
4934 rtx_insn *last_symoff = NULL;
4936 /* For HImode: range is 510, add 4 because pc counts from address of
4937 second instruction after this one, subtract 2 for the jump instruction
4938 that we may need to emit before the table, subtract 2 for the instruction
4939 that fills the jump delay slot (in very rare cases, reorg will take an
4940 instruction from after the constant pool or will leave the delay slot
4941 empty). This gives 510.
4942 For SImode: range is 1020, add 4 because pc counts from address of
4943 second instruction after this one, subtract 2 in case pc is 2 byte
4944 aligned, subtract 2 for the jump instruction that we may need to emit
4945 before the table, subtract 2 for the instruction that fills the jump
4946 delay slot. This gives 1018. */
4948 /* The branch will always be shortened now that the reference address for
4949 forward branches is the successor address, thus we need no longer make
4950 adjustments to the [sh]i_limit for -O0. */
4952 si_limit = 1018;
4953 hi_limit = 510;
4955 while (from && count_si < si_limit && count_hi < hi_limit)
4957 int inc = get_attr_length (from);
4958 int new_align = 1;
4960 /* If this is a label that existed at the time of the compute_alignments
4961 call, determine the alignment. N.B. When find_barrier recurses for
4962 an out-of-reach mova, we might see labels at the start of previously
4963 inserted constant tables. */
4964 if (LABEL_P (from)
4965 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4967 if (optimize)
4968 new_align = 1 << label_to_alignment (from);
4969 else if (BARRIER_P (prev_nonnote_insn (from)))
4970 new_align = 1 << barrier_align (from);
4971 else
4972 new_align = 1;
4973 inc = 0;
4975 /* In case we are scanning a constant table because of recursion, check
4976 for explicit alignments. If the table is long, we might be forced
4977 to emit the new table in front of it; the length of the alignment
4978 might be the last straw. */
4979 else if (NONJUMP_INSN_P (from)
4980 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4981 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4982 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4983 /* When we find the end of a constant table, paste the new constant
4984 at the end. That is better than putting it in front because
4985 this way, we don't need extra alignment for adding a 4-byte-aligned
4986 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4987 else if (NONJUMP_INSN_P (from)
4988 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4989 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4990 return from;
4992 if (BARRIER_P (from))
4994 rtx_insn *next;
4996 found_barrier = from;
4998 /* If we are at the end of the function, or in front of an alignment
4999 instruction, we need not insert an extra alignment. We prefer
5000 this kind of barrier. */
5001 if (barrier_align (from) > 2)
5002 good_barrier = from;
5004 /* If we are at the end of a hot/cold block, dump the constants
5005 here. */
5006 next = NEXT_INSN (from);
5007 if (next
5008 && NOTE_P (next)
5009 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5010 break;
5013 if (broken_move (from))
5015 rtx pat, src, dst;
5016 machine_mode mode;
5018 pat = PATTERN (from);
5019 if (GET_CODE (pat) == PARALLEL)
5020 pat = XVECEXP (pat, 0, 0);
5021 src = SET_SRC (pat);
5022 dst = SET_DEST (pat);
5023 mode = GET_MODE (dst);
5025 /* GOT pcrelat setting comes in pair of
5026 mova .L8,r0
5027 mov.l .L8,r12
5028 instructions. (plus add r0,r12).
5029 Remember if we see one without the other. */
5030 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5031 last_got = last_got ? NULL : from;
5032 else if (PIC_ADDR_P (src))
5033 last_got = last_got ? NULL : from;
5035 /* We must explicitly check the mode, because sometimes the
5036 front end will generate code to load unsigned constants into
5037 HImode targets without properly sign extending them. */
5038 if (mode == HImode
5039 || (mode == SImode && satisfies_constraint_I16 (src)
5040 && REGNO (dst) != FPUL_REG))
5042 found_hi += 2;
5043 /* We put the short constants before the long constants, so
5044 we must count the length of short constants in the range
5045 for the long constants. */
5046 /* ??? This isn't optimal, but is easy to do. */
5047 si_limit -= 2;
5049 else
5051 /* We dump DF/DI constants before SF/SI ones, because
5052 the limit is the same, but the alignment requirements
5053 are higher. We may waste up to 4 additional bytes
5054 for alignment, and the DF/DI constant may have
5055 another SF/SI constant placed before it. */
5056 while (si_align > 2 && found_si + si_align - 2 > count_si)
5057 si_align >>= 1;
5058 if (found_si > count_si)
5059 count_si = found_si;
5060 found_si += GET_MODE_SIZE (mode);
5061 if (num_mova)
5062 si_limit -= GET_MODE_SIZE (mode);
5066 if (mova_p (from))
5068 switch (untangle_mova (&num_mova, &mova, from))
5070 case 1:
5071 if (flag_pic)
5073 rtx src = SET_SRC (PATTERN (from));
5074 if (GET_CODE (src) == CONST
5075 && GET_CODE (XEXP (src, 0)) == UNSPEC
5076 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5077 last_symoff = from;
5079 break;
5080 case 0: return find_barrier (0, 0, mova);
5081 case 2:
5083 leading_mova = 0;
5084 barrier_before_mova
5085 = good_barrier ? good_barrier : found_barrier;
5087 default: break;
5089 if (found_si > count_si)
5090 count_si = found_si;
5092 else if (JUMP_TABLE_DATA_P (from)
5093 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5095 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5096 || (num_mova
5097 && (prev_nonnote_insn (from)
5098 == XEXP (MOVA_LABELREF (mova), 0))))
5099 num_mova--;
5100 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5102 /* We have just passed the barrier in front of the
5103 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5104 the ADDR_DIFF_VEC is accessed as data, just like our pool
5105 constants, this is a good opportunity to accommodate what
5106 we have gathered so far.
5107 If we waited any longer, we could end up at a barrier in
5108 front of code, which gives worse cache usage for separated
5109 instruction / data caches. */
5110 good_barrier = found_barrier;
5111 break;
5113 else
5115 rtx body = PATTERN (from);
5116 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5119 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5120 else if (JUMP_P (from)
5121 && ! TARGET_SH2
5122 && ! optimize_size)
5123 new_align = 4;
5125 /* There is a possibility that a bf is transformed into a bf/s by the
5126 delay slot scheduler. */
5127 if (JUMP_P (from)
5128 && get_attr_type (from) == TYPE_CBRANCH
5129 && ! sequence_insn_p (from))
5130 inc += 2;
5132 if (found_si)
5134 count_si += inc;
5135 if (new_align > si_align)
5137 si_limit -= (count_si - 1) & (new_align - si_align);
5138 si_align = new_align;
5140 count_si = (count_si + new_align - 1) & -new_align;
5142 if (found_hi)
5144 count_hi += inc;
5145 if (new_align > hi_align)
5147 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5148 hi_align = new_align;
5150 count_hi = (count_hi + new_align - 1) & -new_align;
5152 from = NEXT_INSN (from);
5155 if (num_mova)
5157 if (leading_mova)
5159 /* Try as we might, the leading mova is out of range. Change
5160 it into a load (which will become a pcload) and retry. */
5161 fixup_mova (mova);
5162 return find_barrier (0, 0, mova);
5164 else
5166 /* Insert the constant pool table before the mova instruction,
5167 to prevent the mova label reference from going out of range. */
5168 from = mova;
5169 good_barrier = found_barrier = barrier_before_mova;
5173 if (found_barrier)
5175 if (good_barrier && next_real_insn (found_barrier))
5176 found_barrier = good_barrier;
5178 else
5180 /* We didn't find a barrier in time to dump our stuff,
5181 so we'll make one. */
5182 rtx_code_label *label = gen_label_rtx ();
5184 /* Don't emit a constant table in the middle of insns for
5185 casesi_worker_2. This is a bit overkill but is enough
5186 because casesi_worker_2 wouldn't appear so frequently. */
5187 if (last_symoff)
5188 from = last_symoff;
5190 /* If we exceeded the range, then we must back up over the last
5191 instruction we looked at. Otherwise, we just need to undo the
5192 NEXT_INSN at the end of the loop. */
5193 if (PREV_INSN (from) != orig
5194 && (count_hi > hi_limit || count_si > si_limit))
5195 from = PREV_INSN (PREV_INSN (from));
5196 else
5197 from = PREV_INSN (from);
5199 /* Don't emit a constant table int the middle of global pointer setting,
5200 since that that would move the addressing base GOT into another table.
5201 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5202 in the pool anyway, so just move up the whole constant pool.
5204 However, avoid doing so when the last single GOT mov is the starting
5205 insn itself. Going past above the start insn would create a negative
5206 offset, causing errors. */
5207 if (last_got && last_got != orig)
5208 from = PREV_INSN (last_got);
5210 /* Don't insert the constant pool table at the position which
5211 may be the landing pad. */
5212 if (flag_exceptions
5213 && CALL_P (from)
5214 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5215 from = PREV_INSN (from);
5217 /* Walk back to be just before any jump or label.
5218 Putting it before a label reduces the number of times the branch
5219 around the constant pool table will be hit. Putting it before
5220 a jump makes it more likely that the bra delay slot will be
5221 filled. */
5222 while (NOTE_P (from) || JUMP_P (from)
5223 || LABEL_P (from))
5224 from = PREV_INSN (from);
5226 /* Make sure we do not split between a call and its corresponding
5227 CALL_ARG_LOCATION note. */
5228 if (CALL_P (from))
5230 rtx_insn *next = NEXT_INSN (from);
5231 if (next && NOTE_P (next)
5232 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5233 from = next;
5236 from = emit_jump_insn_after (gen_jump (label), from);
5237 JUMP_LABEL (from) = label;
5238 LABEL_NUSES (label) = 1;
5239 found_barrier = emit_barrier_after (from);
5240 emit_label_after (label, found_barrier);
5243 return found_barrier;
5246 /* If the instruction INSN is implemented by a special function, and we can
5247 positively find the register that is used to call the sfunc, and this
5248 register is not used anywhere else in this instruction - except as the
5249 destination of a set, return this register; else, return 0. */
5251 sfunc_uses_reg (rtx_insn *insn)
5253 int i;
5254 rtx pattern, part, reg_part, reg;
5256 if (!NONJUMP_INSN_P (insn))
5257 return NULL_RTX;
5258 pattern = PATTERN (insn);
5259 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5260 return NULL_RTX;
5262 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5264 part = XVECEXP (pattern, 0, i);
5265 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5266 reg_part = part;
5268 if (! reg_part)
5269 return NULL_RTX;
5270 reg = XEXP (reg_part, 0);
5271 for (int i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5273 part = XVECEXP (pattern, 0, i);
5274 if (part == reg_part || GET_CODE (part) == CLOBBER)
5275 continue;
5276 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5277 && REG_P (SET_DEST (part)))
5278 ? SET_SRC (part) : part)))
5279 return NULL_RTX;
5281 return reg;
5284 /* See if the only way in which INSN uses REG is by calling it, or by
5285 setting it while calling it. Set *SET to a SET rtx if the register
5286 is set by INSN. */
5287 static bool
5288 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5290 *set = NULL_RTX;
5292 rtx reg2 = sfunc_uses_reg (insn);
5293 if (reg2 && REGNO (reg2) == REGNO (reg))
5295 rtx pattern = single_set (insn);
5296 if (pattern
5297 && REG_P (SET_DEST (pattern))
5298 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5299 *set = pattern;
5300 return false;
5302 if (!CALL_P (insn))
5304 /* We don't use rtx_equal_p because we don't care if the mode is
5305 different. */
5306 rtx pattern = single_set (insn);
5307 if (pattern
5308 && REG_P (SET_DEST (pattern))
5309 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5311 rtx par, part;
5312 int i;
5314 *set = pattern;
5315 par = PATTERN (insn);
5316 if (GET_CODE (par) == PARALLEL)
5317 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5319 part = XVECEXP (par, 0, i);
5320 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5321 return true;
5323 return reg_mentioned_p (reg, SET_SRC (pattern));
5326 return true;
5329 rtx pattern = PATTERN (insn);
5331 if (GET_CODE (pattern) == PARALLEL)
5333 for (int i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5334 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5335 return true;
5336 pattern = XVECEXP (pattern, 0, 0);
5339 if (GET_CODE (pattern) == SET)
5341 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5343 /* We don't use rtx_equal_p, because we don't care if the
5344 mode is different. */
5345 if (!REG_P (SET_DEST (pattern))
5346 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5347 return true;
5349 *set = pattern;
5352 pattern = SET_SRC (pattern);
5355 if (GET_CODE (pattern) != CALL
5356 || !MEM_P (XEXP (pattern, 0))
5357 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5358 return true;
5360 return false;
5363 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5364 general registers. Bits 0..15 mean that the respective registers
5365 are used as inputs in the instruction. Bits 16..31 mean that the
5366 registers 0..15, respectively, are used as outputs, or are clobbered.
5367 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5369 regs_used (rtx x, int is_dest)
5371 enum rtx_code code;
5372 const char *fmt;
5373 int used = 0;
5375 if (! x)
5376 return used;
5377 code = GET_CODE (x);
5378 switch (code)
5380 case REG:
5381 if (REGNO (x) < 16)
5382 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5383 << (REGNO (x) + is_dest));
5384 return 0;
5385 case SUBREG:
5387 rtx y = SUBREG_REG (x);
5389 if (!REG_P (y))
5390 break;
5391 if (REGNO (y) < 16)
5392 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5393 << (REGNO (y) +
5394 subreg_regno_offset (REGNO (y),
5395 GET_MODE (y),
5396 SUBREG_BYTE (x),
5397 GET_MODE (x)) + is_dest));
5398 return 0;
5400 case SET:
5401 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5402 case RETURN:
5403 /* If there was a return value, it must have been indicated with USE. */
5404 return 0x00ffff00;
5405 case CLOBBER:
5406 is_dest = 1;
5407 break;
5408 case MEM:
5409 is_dest = 0;
5410 break;
5411 case CALL:
5412 used |= 0x00ff00f0;
5413 break;
5414 default:
5415 break;
5418 fmt = GET_RTX_FORMAT (code);
5420 for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5422 if (fmt[i] == 'E')
5424 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
5425 used |= regs_used (XVECEXP (x, i, j), is_dest);
5427 else if (fmt[i] == 'e')
5428 used |= regs_used (XEXP (x, i), is_dest);
5430 return used;
5433 /* Create an instruction that prevents redirection of a conditional branch
5434 to the destination of the JUMP with address ADDR.
5435 If the branch needs to be implemented as an indirect jump, try to find
5436 a scratch register for it.
5437 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5438 If any preceding insn that doesn't fit into a delay slot is good enough,
5439 pass 1. Pass 2 if a definite blocking insn is needed.
5440 -1 is used internally to avoid deep recursion.
5441 If a blocking instruction is made or recognized, return it. */
5442 static rtx_insn *
5443 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5445 int dead = 0;
5446 rtx_insn *prev = prev_nonnote_insn (jump);
5448 /* First, check if we already have an instruction that satisfies our need. */
5449 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5451 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5452 return prev;
5453 if (GET_CODE (PATTERN (prev)) == USE
5454 || GET_CODE (PATTERN (prev)) == CLOBBER
5455 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5456 prev = jump;
5457 else if ((need_block &= ~1) < 0)
5458 return prev;
5459 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5460 need_block = 0;
5462 if (GET_CODE (PATTERN (jump)) == RETURN)
5464 if (! need_block)
5465 return prev;
5466 /* Reorg even does nasty things with return insns that cause branches
5467 to go out of range - see find_end_label and callers. */
5468 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5470 /* We can't use JUMP_LABEL here because it might be undefined
5471 when not optimizing. */
5472 rtx dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5473 /* If the branch is out of range, try to find a scratch register for it. */
5474 if (optimize
5475 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5476 > 4092 + 4098))
5478 rtx_insn *scan;
5479 /* Don't look for the stack pointer as a scratch register,
5480 it would cause trouble if an interrupt occurred. */
5481 unsigned attempt = 0x7fff, used;
5482 int jump_left = flag_expensive_optimizations + 1;
5484 /* It is likely that the most recent eligible instruction is wanted for
5485 the delay slot. Therefore, find out which registers it uses, and
5486 try to avoid using them. */
5488 for (scan = jump; (scan = PREV_INSN (scan)); )
5490 if (scan->deleted ())
5491 continue;
5492 rtx_code code = GET_CODE (scan);
5493 if (code == CODE_LABEL || code == JUMP_INSN)
5494 break;
5495 if (code == INSN
5496 && GET_CODE (PATTERN (scan)) != USE
5497 && GET_CODE (PATTERN (scan)) != CLOBBER
5498 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5500 attempt &= ~regs_used (PATTERN (scan), 0);
5501 break;
5504 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5505 (scan = NEXT_INSN (scan)); )
5507 if (scan->deleted ())
5508 continue;
5509 rtx_code code = GET_CODE (scan);
5510 if (INSN_P (scan))
5512 used |= regs_used (PATTERN (scan), 0);
5513 if (code == CALL_INSN)
5514 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5515 dead |= (used >> 16) & ~used;
5516 if (dead & attempt)
5518 dead &= attempt;
5519 break;
5521 if (code == JUMP_INSN)
5523 if (jump_left-- && simplejump_p (scan))
5524 scan = JUMP_LABEL_AS_INSN (scan);
5525 else
5526 break;
5530 /* Mask out the stack pointer again, in case it was
5531 the only 'free' register we have found. */
5532 dead &= 0x7fff;
5534 /* If the immediate destination is still in range, check for possible
5535 threading with a jump beyond the delay slot insn.
5536 Don't check if we are called recursively; the jump has been or will be
5537 checked in a different invocation then. */
5539 else if (optimize && need_block >= 0)
5541 rtx_insn *next = next_active_insn (as_a<rtx_insn *> (dest));
5542 next = next_active_insn (next);
5543 if (next && JUMP_P (next)
5544 && GET_CODE (PATTERN (next)) == SET
5545 && recog_memoized (next) == CODE_FOR_jump_compact)
5547 dest = JUMP_LABEL (next);
5548 if (dest
5549 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5550 > 4092 + 4098))
5551 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5555 if (dead)
5557 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5559 /* It would be nice if we could convert the jump into an indirect
5560 jump / far branch right now, and thus exposing all constituent
5561 instructions to further optimization. However, reorg uses
5562 simplejump_p to determine if there is an unconditional jump where
5563 it should try to schedule instructions from the target of the
5564 branch; simplejump_p fails for indirect jumps even if they have
5565 a JUMP_LABEL. */
5566 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5567 (reg, GEN_INT (unspec_bbr_uid++)),
5568 jump);
5569 /* ??? We would like this to have the scope of the jump, but that
5570 scope will change when a delay slot insn of an inner scope is added.
5571 Hence, after delay slot scheduling, we'll have to expect
5572 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5573 the jump. */
5575 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5576 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5577 return insn;
5579 else if (need_block)
5580 /* We can't use JUMP_LABEL here because it might be undefined
5581 when not optimizing. */
5582 return emit_insn_before (gen_block_branch_redirect
5583 (GEN_INT (unspec_bbr_uid++)),
5584 jump);
5585 return prev;
5588 #define CONDJUMP_MIN -252
5589 #define CONDJUMP_MAX 262
5590 struct far_branch
5592 /* A label (to be placed) in front of the jump
5593 that jumps to our ultimate destination. */
5594 rtx_insn *near_label;
5595 /* Where we are going to insert it if we cannot move the jump any farther,
5596 or the jump itself if we have picked up an existing jump. */
5597 rtx_insn *insert_place;
5598 /* The ultimate destination. */
5599 rtx_insn *far_label;
5600 struct far_branch *prev;
5601 /* If the branch has already been created, its address;
5602 else the address of its first prospective user. */
5603 int address;
5606 enum mdep_reorg_phase_e mdep_reorg_phase;
5608 static void
5609 gen_far_branch (struct far_branch *bp)
5611 rtx_insn *insn = bp->insert_place;
5612 rtx_jump_insn *jump;
5613 rtx_code_label *label = gen_label_rtx ();
5615 emit_label_after (label, insn);
5616 if (bp->far_label)
5618 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5619 LABEL_NUSES (bp->far_label)++;
5621 else
5622 jump = emit_jump_insn_after (gen_return (), insn);
5624 /* Emit a barrier so that reorg knows that any following instructions
5625 are not reachable via a fall-through path.
5626 But don't do this when not optimizing, since we wouldn't suppress the
5627 alignment for the barrier then, and could end up with out-of-range
5628 pc-relative loads. */
5629 if (optimize)
5630 emit_barrier_after (jump);
5631 emit_label_after (bp->near_label, insn);
5633 if (bp->far_label)
5634 JUMP_LABEL (jump) = bp->far_label;
5635 else
5637 rtx pat = PATTERN (jump);
5638 gcc_assert (ANY_RETURN_P (pat));
5639 JUMP_LABEL (jump) = pat;
5642 bool ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5643 gcc_assert (ok);
5645 /* If we are branching around a jump (rather than a return), prevent
5646 reorg from using an insn from the jump target as the delay slot insn -
5647 when reorg did this, it pessimized code (we rather hide the delay slot)
5648 and it could cause branches to go out of range. */
5649 if (bp->far_label)
5650 (emit_insn_after
5651 (gen_stuff_delay_slot
5652 (GEN_INT (unspec_bbr_uid++),
5653 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5654 insn));
5655 /* Prevent reorg from undoing our splits. */
5656 gen_block_redirect (jump, bp->address += 2, 2);
5659 /* Fix up ADDR_DIFF_VECs. */
5660 void
5661 fixup_addr_diff_vecs (rtx_insn *first)
5663 rtx_insn *insn;
5665 for (insn = first; insn; insn = NEXT_INSN (insn))
5667 rtx vec_lab, pat, prevpat, x, braf_label;
5668 rtx_insn *prev;
5670 if (! JUMP_TABLE_DATA_P (insn)
5671 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5672 continue;
5673 pat = PATTERN (insn);
5674 vec_lab = XEXP (XEXP (pat, 0), 0);
5676 /* Search the matching casesi_jump_2. */
5677 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5679 if (!JUMP_P (prev))
5680 continue;
5681 prevpat = PATTERN (prev);
5682 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5683 continue;
5684 x = XVECEXP (prevpat, 0, 1);
5685 if (GET_CODE (x) != USE)
5686 continue;
5687 x = XEXP (x, 0);
5688 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5689 break;
5691 /* FIXME: This is a bug in the optimizer, but it seems harmless
5692 to just avoid panicing. */
5693 if (!prev)
5694 continue;
5696 /* Emit the reference label of the braf where it belongs, right after
5697 the casesi_jump_2 (i.e. braf). */
5698 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5699 emit_label_after (braf_label, prev);
5701 /* Fix up the ADDR_DIF_VEC to be relative
5702 to the reference address of the braf. */
5703 XEXP (XEXP (pat, 0), 0) = braf_label;
5707 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5708 a barrier. Return the base 2 logarithm of the desired alignment. */
5710 barrier_align (rtx_insn *barrier_or_label)
5712 if (! barrier_or_label)
5713 return 0;
5715 if (LABEL_P (barrier_or_label)
5716 && NEXT_INSN (barrier_or_label)
5717 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5718 return 2;
5720 if (BARRIER_P (barrier_or_label)
5721 && PREV_INSN (barrier_or_label)
5722 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5724 rtx pat = PATTERN (PREV_INSN (barrier_or_label));
5725 /* If this is a very small table, we want to keep the alignment after
5726 the table to the minimum for proper code alignment. */
5727 return ((optimize_size
5728 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5729 <= (unsigned) 1 << (CACHE_LOG - 2)))
5730 ? 1 : align_jumps_log);
5733 rtx_insn *next = next_active_insn (barrier_or_label);
5735 if (! next)
5736 return 0;
5738 rtx pat = PATTERN (next);
5740 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5741 /* This is a barrier in front of a constant table. */
5742 return 0;
5744 if (optimize_size)
5745 return 0;
5747 if (! TARGET_SH2 || ! optimize)
5748 return align_jumps_log;
5750 /* When fixing up pcloads, a constant table might be inserted just before
5751 the basic block that ends with the barrier. Thus, we can't trust the
5752 instruction lengths before that. */
5753 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5755 /* Check if there is an immediately preceding branch to the insn beyond
5756 the barrier. We must weight the cost of discarding useful information
5757 from the current cache line when executing this branch and there is
5758 an alignment, against that of fetching unneeded insn in front of the
5759 branch target when there is no alignment. */
5761 /* There are two delay_slot cases to consider. One is the simple case
5762 where the preceding branch is to the insn beyond the barrier (simple
5763 delay slot filling), and the other is where the preceding branch has
5764 a delay slot that is a duplicate of the insn after the barrier
5765 (fill_eager_delay_slots) and the branch is to the insn after the insn
5766 after the barrier. */
5768 int slot, credit;
5769 bool jump_to_next = false;
5771 /* Skip to the insn before the JUMP_INSN before the barrier under
5772 investigation. */
5773 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5775 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5776 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5777 prev = prev_real_insn (prev))
5779 jump_to_next = false;
5780 if (GET_CODE (PATTERN (prev)) == USE
5781 || GET_CODE (PATTERN (prev)) == CLOBBER)
5782 continue;
5783 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5785 prev = prev_seq->insn (1);
5786 if (INSN_UID (prev) == INSN_UID (next))
5788 /* Delay slot was filled with insn at jump target. */
5789 jump_to_next = true;
5790 continue;
5794 if (slot &&
5795 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5796 slot = 0;
5797 credit -= get_attr_length (prev);
5799 if (prev && jump_to_label_p (prev))
5801 rtx_insn *x;
5802 if (jump_to_next
5803 || next_real_insn (JUMP_LABEL (prev)) == next
5804 /* If relax_delay_slots() decides NEXT was redundant
5805 with some previous instruction, it will have
5806 redirected PREV's jump to the following insn. */
5807 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5808 /* There is no upper bound on redundant instructions
5809 that might have been skipped, but we must not put an
5810 alignment where none had been before. */
5811 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5812 (INSN_P (x)
5813 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5814 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5815 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5817 rtx pat = PATTERN (prev);
5818 if (GET_CODE (pat) == PARALLEL)
5819 pat = XVECEXP (pat, 0, 0);
5820 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5821 return 0;
5826 return align_jumps_log;
5829 /* If we are inside a phony loop, almost any kind of label can turn up as the
5830 first one in the loop. Aligning a braf label causes incorrect switch
5831 destination addresses; we can detect braf labels because they are
5832 followed by a BARRIER.
5833 Applying loop alignment to small constant or switch tables is a waste
5834 of space, so we suppress this too. */
5836 sh_loop_align (rtx_insn *label)
5838 rtx_insn *next = label;
5840 if (! optimize || optimize_size)
5841 return 0;
5844 next = next_nonnote_insn (next);
5845 while (next && LABEL_P (next));
5847 if (! next
5848 || ! INSN_P (next)
5849 || recog_memoized (next) == CODE_FOR_consttable_2)
5850 return 0;
5852 return align_loops_log;
5855 /* Do a final pass over the function, just before delayed branch
5856 scheduling. */
5857 static void
5858 sh_reorg (void)
5860 rtx_insn *first, *insn, *mova = NULL;
5861 int num_mova;
5862 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5863 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5865 first = get_insns ();
5866 max_labelno_before_reorg = max_label_num ();
5868 /* We must split call insns before introducing `mova's. If we're
5869 optimizing, they'll have already been split. Otherwise, make
5870 sure we don't split them too late. */
5871 if (! optimize)
5872 split_all_insns_noflow ();
5874 /* If relaxing, generate pseudo-ops to associate function calls with
5875 the symbols they call. It does no harm to not generate these
5876 pseudo-ops. However, when we can generate them, it enables the
5877 linker to potentially relax the jsr to a bsr, and eliminate the
5878 register load and, possibly, the constant pool entry. */
5880 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5881 if (TARGET_RELAX)
5883 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5884 own purposes. This works because none of the remaining passes
5885 need to look at them.
5887 ??? But it may break in the future. We should use a machine
5888 dependent REG_NOTE, or some other approach entirely. */
5889 for (insn = first; insn; insn = NEXT_INSN (insn))
5891 if (INSN_P (insn))
5893 rtx note;
5895 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5896 NULL_RTX)) != 0)
5897 remove_note (insn, note);
5901 for (insn = first; insn; insn = NEXT_INSN (insn))
5903 rtx pattern, reg, set, dies;
5904 rtx_code_label *label;
5905 rtx_insn *link, *scan;
5906 int rescan = 0, foundinsn = 0;
5908 if (CALL_P (insn))
5910 pattern = PATTERN (insn);
5912 if (GET_CODE (pattern) == PARALLEL)
5913 pattern = XVECEXP (pattern, 0, 0);
5914 if (GET_CODE (pattern) == SET)
5915 pattern = SET_SRC (pattern);
5917 if (GET_CODE (pattern) != CALL
5918 || !MEM_P (XEXP (pattern, 0)))
5919 continue;
5921 reg = XEXP (XEXP (pattern, 0), 0);
5923 else
5925 reg = sfunc_uses_reg (insn);
5926 if (! reg)
5927 continue;
5930 if (!REG_P (reg))
5931 continue;
5933 /* Try scanning backward to find where the register is set. */
5934 link = NULL;
5935 for (scan = PREV_INSN (insn);
5936 scan && !LABEL_P (scan);
5937 scan = PREV_INSN (scan))
5939 if (! INSN_P (scan))
5940 continue;
5942 if (! reg_mentioned_p (reg, scan))
5943 continue;
5945 if (noncall_uses_reg (reg, scan, &set))
5946 break;
5948 if (set)
5950 link = scan;
5951 break;
5955 if (! link)
5956 continue;
5958 /* The register is set at LINK. */
5960 /* We can only optimize the function call if the register is
5961 being set to a symbol. In theory, we could sometimes
5962 optimize calls to a constant location, but the assembler
5963 and linker do not support that at present. */
5964 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5965 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5966 continue;
5968 /* Scan forward from LINK to the place where REG dies, and
5969 make sure that the only insns which use REG are
5970 themselves function calls. */
5972 /* ??? This doesn't work for call targets that were allocated
5973 by reload, since there may not be a REG_DEAD note for the
5974 register. */
5976 dies = NULL_RTX;
5977 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5979 rtx scanset;
5981 /* Don't try to trace forward past a CODE_LABEL if we haven't
5982 seen INSN yet. Ordinarily, we will only find the setting insn
5983 if it is in the same basic block. However,
5984 cross-jumping can insert code labels in between the load and
5985 the call, and can result in situations where a single call
5986 insn may have two targets depending on where we came from. */
5988 if (LABEL_P (scan) && ! foundinsn)
5989 break;
5991 if (! INSN_P (scan))
5992 continue;
5994 /* Don't try to trace forward past a JUMP. To optimize
5995 safely, we would have to check that all the
5996 instructions at the jump destination did not use REG. */
5998 if (JUMP_P (scan))
5999 break;
6001 if (! reg_mentioned_p (reg, scan))
6002 continue;
6004 if (noncall_uses_reg (reg, scan, &scanset))
6005 break;
6007 if (scan == insn)
6008 foundinsn = 1;
6010 if (scan != insn
6011 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6013 /* There is a function call to this register other
6014 than the one we are checking. If we optimize
6015 this call, we need to rescan again below. */
6016 rescan = 1;
6019 /* ??? We shouldn't have to worry about SCANSET here.
6020 We should just be able to check for a REG_DEAD note
6021 on a function call. However, the REG_DEAD notes are
6022 apparently not dependable around libcalls; c-torture
6023 execute/920501-2 is a test case. If SCANSET is set,
6024 then this insn sets the register, so it must have
6025 died earlier. Unfortunately, this will only handle
6026 the cases in which the register is, in fact, set in a
6027 later insn. */
6029 /* ??? We shouldn't have to use FOUNDINSN here.
6030 This dates back to when we used LOG_LINKS to find
6031 the most recent insn which sets the register. */
6033 if (foundinsn
6034 && (scanset
6035 || find_reg_note (scan, REG_DEAD, reg)))
6037 dies = scan;
6038 break;
6042 if (! dies)
6044 /* Either there was a branch, or some insn used REG
6045 other than as a function call address. */
6046 continue;
6049 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6050 on the insn which sets the register, and on each call insn
6051 which uses the register. In final_prescan_insn we look for
6052 the REG_LABEL_OPERAND notes, and output the appropriate label
6053 or pseudo-op. */
6055 label = gen_label_rtx ();
6056 add_reg_note (link, REG_LABEL_OPERAND, label);
6057 add_reg_note (insn, REG_LABEL_OPERAND, label);
6058 if (rescan)
6060 scan = link;
6063 rtx reg2;
6065 scan = NEXT_INSN (scan);
6066 if (scan != insn
6067 && ((CALL_P (scan)
6068 && reg_mentioned_p (reg, scan))
6069 || ((reg2 = sfunc_uses_reg (scan))
6070 && REGNO (reg2) == REGNO (reg))))
6071 add_reg_note (scan, REG_LABEL_OPERAND, label);
6073 while (scan != dies);
6078 if (TARGET_SH2)
6079 fixup_addr_diff_vecs (first);
6081 if (optimize)
6083 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6084 shorten_branches (first);
6087 /* Scan the function looking for move instructions which have to be
6088 changed to pc-relative loads and insert the literal tables. */
6089 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6090 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6092 if (mova_p (insn))
6094 /* ??? basic block reordering can move a switch table dispatch
6095 below the switch table. Check if that has happened.
6096 We only have the addresses available when optimizing; but then,
6097 this check shouldn't be needed when not optimizing. */
6098 if (!untangle_mova (&num_mova, &mova, insn))
6100 insn = mova;
6101 num_mova = 0;
6104 else if (JUMP_TABLE_DATA_P (insn)
6105 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6106 && num_mova
6107 /* ??? loop invariant motion can also move a mova out of a
6108 loop. Since loop does this code motion anyway, maybe we
6109 should wrap UNSPEC_MOVA into a CONST, so that reload can
6110 move it back. */
6111 && ((num_mova > 1
6112 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6113 || (prev_nonnote_insn (insn)
6114 == XEXP (MOVA_LABELREF (mova), 0))))
6116 rtx_insn *scan;
6117 int total;
6119 num_mova--;
6121 /* Some code might have been inserted between the mova and
6122 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6123 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6124 total += get_attr_length (scan);
6126 /* range of mova is 1020, add 4 because pc counts from address of
6127 second instruction after this one, subtract 2 in case pc is 2
6128 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6129 cancels out with alignment effects of the mova itself. */
6130 if (total > 1022)
6132 /* Change the mova into a load, and restart scanning
6133 there. broken_move will then return true for mova. */
6134 fixup_mova (mova);
6135 insn = mova;
6138 if (broken_move (insn)
6139 || (NONJUMP_INSN_P (insn)
6140 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6142 rtx_insn *scan;
6143 /* Scan ahead looking for a barrier to stick the constant table
6144 behind. */
6145 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6146 rtx_insn *last_float_move = NULL;
6147 rtx last_float = 0, *last_float_addr = NULL;
6148 int need_aligned_label = 0;
6150 if (num_mova && ! mova_p (mova))
6152 /* find_barrier had to change the first mova into a
6153 pcload; thus, we have to start with this new pcload. */
6154 insn = mova;
6155 num_mova = 0;
6157 /* Now find all the moves between the points and modify them. */
6158 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6160 if (LABEL_P (scan))
6161 last_float = 0;
6162 if (NONJUMP_INSN_P (scan)
6163 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6164 need_aligned_label = 1;
6165 if (broken_move (scan))
6167 rtx *patp = &PATTERN (scan), pat = *patp;
6168 rtx src, dst;
6169 rtx lab;
6170 rtx newsrc;
6171 machine_mode mode;
6173 if (GET_CODE (pat) == PARALLEL)
6174 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6175 src = SET_SRC (pat);
6176 dst = SET_DEST (pat);
6177 mode = GET_MODE (dst);
6179 if (mode == SImode && satisfies_constraint_I16 (src)
6180 && REGNO (dst) != FPUL_REG)
6182 int offset = 0;
6184 mode = HImode;
6185 while (GET_CODE (dst) == SUBREG)
6187 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6188 GET_MODE (SUBREG_REG (dst)),
6189 SUBREG_BYTE (dst),
6190 GET_MODE (dst));
6191 dst = SUBREG_REG (dst);
6193 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6195 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6197 /* This must be an insn that clobbers r0. */
6198 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6199 XVECLEN (PATTERN (scan), 0)
6200 - 1);
6201 rtx clobber = *clobberp;
6203 gcc_assert (GET_CODE (clobber) == CLOBBER
6204 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6206 if (last_float
6207 && reg_set_between_p (r0_rtx, last_float_move, scan))
6208 last_float = 0;
6209 lab = add_constant (src, mode, last_float);
6210 if (lab)
6211 emit_insn_before (gen_mova (lab), scan);
6212 else
6214 /* There will be a REG_UNUSED note for r0 on
6215 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6216 lest reorg:mark_target_live_regs will not
6217 consider r0 to be used, and we end up with delay
6218 slot insn in front of SCAN that clobbers r0. */
6219 rtx note
6220 = find_regno_note (last_float_move, REG_UNUSED, 0);
6222 /* If we are not optimizing, then there may not be
6223 a note. */
6224 if (note)
6225 PUT_REG_NOTE_KIND (note, REG_INC);
6227 *last_float_addr = r0_inc_rtx;
6229 last_float_move = scan;
6230 last_float = src;
6231 newsrc = gen_const_mem (mode,
6232 (((TARGET_SH4 && ! TARGET_FMOVD)
6233 || REGNO (dst) == FPUL_REG)
6234 ? r0_inc_rtx
6235 : r0_rtx));
6236 last_float_addr = &XEXP (newsrc, 0);
6238 /* Remove the clobber of r0. */
6239 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6240 gen_rtx_SCRATCH (Pmode));
6242 /* This is a mova needing a label. Create it. */
6243 else if (GET_CODE (src) == UNSPEC
6244 && XINT (src, 1) == UNSPEC_MOVA
6245 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6247 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6248 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6249 newsrc = gen_rtx_UNSPEC (SImode,
6250 gen_rtvec (1, newsrc),
6251 UNSPEC_MOVA);
6253 else if (GET_CODE (src) == UNSPEC_VOLATILE
6254 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6256 newsrc = XVECEXP (src, 0, 0);
6257 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6258 INSN_CODE (scan) = -1;
6259 continue;
6261 else
6263 lab = add_constant (src, mode, 0);
6264 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6265 newsrc = gen_const_mem (mode, newsrc);
6267 *patp = gen_rtx_SET (dst, newsrc);
6268 INSN_CODE (scan) = -1;
6271 dump_table (need_aligned_label ? insn : 0, barrier);
6272 insn = barrier;
6275 label_ref_list_d_pool.release ();
6276 for (insn = first; insn; insn = NEXT_INSN (insn))
6277 PUT_MODE (insn, VOIDmode);
6279 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6280 INSN_ADDRESSES_FREE ();
6281 split_branches (first);
6283 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6284 also has an effect on the register that holds the address of the sfunc.
6285 Insert an extra dummy insn in front of each sfunc that pretends to
6286 use this register. */
6287 if (flag_delayed_branch)
6289 for (insn = first; insn; insn = NEXT_INSN (insn))
6291 rtx reg = sfunc_uses_reg (insn);
6293 if (! reg)
6294 continue;
6295 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6298 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6301 /* Return the UID of the insn that follows the specified label. */
6303 get_dest_uid (rtx label, int max_uid)
6305 rtx_insn *dest = next_real_insn (label);
6307 if (! dest)
6308 /* This can happen for an undefined label. */
6309 return 0;
6310 int dest_uid = INSN_UID (dest);
6311 /* If this is a newly created branch redirection blocking instruction,
6312 we cannot index the branch_uid or insn_addresses arrays with its
6313 uid. But then, we won't need to, because the actual destination is
6314 the following branch. */
6315 while (dest_uid >= max_uid)
6317 dest = NEXT_INSN (dest);
6318 dest_uid = INSN_UID (dest);
6320 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6321 return 0;
6322 return dest_uid;
6325 /* Split condbranches that are out of range. Also add clobbers for
6326 scratch registers that are needed in far jumps.
6327 We do this before delay slot scheduling, so that it can take our
6328 newly created instructions into account. It also allows us to
6329 find branches with common targets more easily. */
6330 static void
6331 split_branches (rtx_insn *first)
6333 rtx_insn *insn;
6334 struct far_branch **uid_branch, *far_branch_list = 0;
6335 int max_uid = get_max_uid ();
6336 int ok;
6338 /* Find out which branches are out of range. */
6339 shorten_branches (first);
6341 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6342 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6344 for (insn = first; insn; insn = NEXT_INSN (insn))
6345 if (! INSN_P (insn))
6346 continue;
6347 else if (insn->deleted ())
6349 /* Shorten_branches would split this instruction again,
6350 so transform it into a note. */
6351 SET_INSN_DELETED (insn);
6353 else if (JUMP_P (insn))
6355 enum attr_type type = get_attr_type (insn);
6356 if (type == TYPE_CBRANCH)
6358 rtx_insn *next, *beyond;
6360 if (get_attr_length (insn) > 4)
6362 rtx src = SET_SRC (PATTERN (insn));
6363 rtx olabel = XEXP (XEXP (src, 1), 0);
6364 int addr = INSN_ADDRESSES (INSN_UID (insn));
6365 rtx_insn *label = 0;
6366 int dest_uid = get_dest_uid (olabel, max_uid);
6367 struct far_branch *bp = uid_branch[dest_uid];
6369 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6370 the label if the LABEL_NUSES count drops to zero. There is
6371 always a jump_optimize pass that sets these values, but it
6372 proceeds to delete unreferenced code, and then if not
6373 optimizing, to un-delete the deleted instructions, thus
6374 leaving labels with too low uses counts. */
6375 if (! optimize)
6377 JUMP_LABEL (insn) = olabel;
6378 LABEL_NUSES (olabel)++;
6380 if (! bp)
6382 bp = (struct far_branch *) alloca (sizeof *bp);
6383 uid_branch[dest_uid] = bp;
6384 bp->prev = far_branch_list;
6385 far_branch_list = bp;
6386 bp->far_label = as_a <rtx_insn *> (
6387 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6388 0));
6389 LABEL_NUSES (bp->far_label)++;
6391 else
6393 label = bp->near_label;
6394 if (! label && bp->address - addr >= CONDJUMP_MIN)
6396 rtx_insn *block = bp->insert_place;
6398 if (GET_CODE (PATTERN (block)) == RETURN)
6399 block = PREV_INSN (block);
6400 else
6401 block = gen_block_redirect (block,
6402 bp->address, 2);
6403 label = emit_label_after (gen_label_rtx (),
6404 PREV_INSN (block));
6405 bp->near_label = label;
6407 else if (label && ! NEXT_INSN (label))
6409 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6410 bp->insert_place = insn;
6411 else
6412 gen_far_branch (bp);
6415 if (! label
6416 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6418 bp->near_label = label = gen_label_rtx ();
6419 bp->insert_place = insn;
6420 bp->address = addr;
6422 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6423 gcc_assert (ok);
6425 else
6427 /* get_attr_length (insn) == 2 */
6428 /* Check if we have a pattern where reorg wants to redirect
6429 the branch to a label from an unconditional branch that
6430 is too far away. */
6431 /* We can't use JUMP_LABEL here because it might be undefined
6432 when not optimizing. */
6433 /* A syntax error might cause beyond to be NULL_RTX. */
6434 rtx temp = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6435 beyond = next_active_insn (as_a<rtx_insn *> (temp));
6437 if (beyond
6438 && (JUMP_P (beyond)
6439 || ((beyond = next_active_insn (beyond))
6440 && JUMP_P (beyond)))
6441 && GET_CODE (PATTERN (beyond)) == SET
6442 && recog_memoized (beyond) == CODE_FOR_jump_compact
6443 && ((INSN_ADDRESSES
6444 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6445 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6446 > 252 + 258 + 2))
6447 gen_block_redirect (beyond,
6448 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6451 next = next_active_insn (insn);
6453 if (next
6454 && (JUMP_P (next)
6455 || ((next = next_active_insn (next))
6456 && JUMP_P (next)))
6457 && GET_CODE (PATTERN (next)) == SET
6458 && recog_memoized (next) == CODE_FOR_jump_compact
6459 && ((INSN_ADDRESSES
6460 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6461 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6462 > 252 + 258 + 2))
6463 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6465 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6467 int addr = INSN_ADDRESSES (INSN_UID (insn));
6468 rtx_insn *far_label = 0;
6469 int dest_uid = 0;
6470 struct far_branch *bp;
6472 if (type == TYPE_JUMP)
6474 if (CROSSING_JUMP_P (insn))
6476 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6477 insn);
6478 continue;
6481 far_label = as_a <rtx_insn *> (
6482 XEXP (SET_SRC (PATTERN (insn)), 0));
6483 dest_uid = get_dest_uid (far_label, max_uid);
6484 if (! dest_uid)
6486 /* Parse errors can lead to labels outside
6487 the insn stream. */
6488 if (! NEXT_INSN (far_label))
6489 continue;
6491 if (! optimize)
6493 JUMP_LABEL (insn) = far_label;
6494 LABEL_NUSES (far_label)++;
6496 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6497 far_label = 0;
6500 bp = uid_branch[dest_uid];
6501 if (! bp)
6503 bp = (struct far_branch *) alloca (sizeof *bp);
6504 uid_branch[dest_uid] = bp;
6505 bp->prev = far_branch_list;
6506 far_branch_list = bp;
6507 bp->near_label = 0;
6508 bp->far_label = far_label;
6509 if (far_label)
6510 LABEL_NUSES (far_label)++;
6512 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6513 if (addr - bp->address <= CONDJUMP_MAX)
6514 emit_label_after (bp->near_label, PREV_INSN (insn));
6515 else
6517 gen_far_branch (bp);
6518 bp->near_label = 0;
6520 else
6521 bp->near_label = 0;
6522 bp->address = addr;
6523 bp->insert_place = insn;
6524 if (! far_label)
6525 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6526 else
6527 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6530 /* Generate all pending far branches,
6531 and free our references to the far labels. */
6532 while (far_branch_list)
6534 if (far_branch_list->near_label
6535 && ! NEXT_INSN (far_branch_list->near_label))
6536 gen_far_branch (far_branch_list);
6537 if (optimize
6538 && far_branch_list->far_label
6539 && ! --LABEL_NUSES (far_branch_list->far_label))
6540 delete_insn (far_branch_list->far_label);
6541 far_branch_list = far_branch_list->prev;
6544 /* Instruction length information is no longer valid due to the new
6545 instructions that have been generated. */
6546 init_insn_lengths ();
6549 /* Dump out instruction addresses, which is useful for debugging the
6550 constant pool table stuff.
6552 If relaxing, output the label and pseudo-ops used to link together
6553 calls and the instruction which set the registers.
6555 ??? The addresses printed by this routine for insns are nonsense for
6556 insns which are inside of a sequence where none of the inner insns have
6557 variable length. This is because the second pass of shorten_branches
6558 does not bother to update them. */
6559 void
6560 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6561 int noperands ATTRIBUTE_UNUSED)
6563 if (TARGET_DUMPISIZE)
6564 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6566 if (TARGET_RELAX)
6568 if (rtx note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX))
6570 rtx pattern = PATTERN (insn);
6571 if (GET_CODE (pattern) == PARALLEL)
6572 pattern = XVECEXP (pattern, 0, 0);
6573 switch (GET_CODE (pattern))
6575 case SET:
6576 if (GET_CODE (SET_SRC (pattern)) != CALL
6577 && get_attr_type (insn) != TYPE_SFUNC)
6579 targetm.asm_out.internal_label
6580 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6581 break;
6583 /* FALLTHROUGH */
6584 case CALL:
6585 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6586 CODE_LABEL_NUMBER (XEXP (note, 0)));
6587 break;
6589 default:
6590 gcc_unreachable ();
6596 /* Dump out any constants accumulated in the final pass. These will
6597 only be labels. */
6598 const char *
6599 output_jump_label_table (void)
6601 if (pool_size)
6603 fprintf (asm_out_file, "\t.align 2\n");
6604 for (int i = 0; i < pool_size; i++)
6606 pool_node *p = &pool_vector[i];
6608 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6609 CODE_LABEL_NUMBER (p->label));
6610 output_asm_insn (".long %O0", &p->value);
6612 pool_size = 0;
6615 return "";
6618 /* A full frame looks like:
6620 arg-5
6621 arg-4
6622 [ if current_function_anonymous_args
6623 arg-3
6624 arg-2
6625 arg-1
6626 arg-0 ]
6627 saved-fp
6628 saved-r10
6629 saved-r11
6630 saved-r12
6631 saved-pr
6632 local-n
6634 local-1
6635 local-0 <- fp points here.
6637 Number of bytes pushed for anonymous args, used to pass information
6638 between expand_prologue and expand_epilogue.
6640 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6641 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6642 for an epilogue and a negative value means that it's for a sibcall
6643 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6644 all the registers that are about to be restored, and hence dead. */
6645 static void
6646 output_stack_adjust (int size, rtx reg, int epilogue_p,
6647 HARD_REG_SET *live_regs_mask, bool frame_p)
6649 rtx_insn *(*emit_fn) (rtx) = frame_p ? &emit_frame_insn : &emit_insn;
6650 if (size)
6652 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6654 /* This test is bogus, as output_stack_adjust is used to re-align the
6655 stack. */
6656 #if 0
6657 gcc_assert (!(size % align));
6658 #endif
6660 if (CONST_OK_FOR_ADD (size))
6661 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6662 /* Try to do it with two partial adjustments; however, we must make
6663 sure that the stack is properly aligned at all times, in case
6664 an interrupt occurs between the two partial adjustments. */
6665 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6666 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6668 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6669 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6671 else
6673 rtx const_reg;
6674 rtx insn;
6675 int temp = epilogue_p ? 7 : 1;
6676 int i;
6678 /* If TEMP is invalid, we could temporarily save a general
6679 register to MACL. However, there is currently no need
6680 to handle this case, so just die when we see it. */
6681 if (epilogue_p < 0
6682 || current_function_interrupt
6683 || ! call_really_used_regs[temp] || fixed_regs[temp])
6684 temp = -1;
6685 if (temp < 0 && ! current_function_interrupt && epilogue_p >= 0)
6687 HARD_REG_SET temps;
6688 COPY_HARD_REG_SET (temps, call_used_reg_set);
6689 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6690 if (epilogue_p > 0)
6692 int nreg = 0;
6693 if (crtl->return_rtx)
6695 machine_mode mode;
6696 mode = GET_MODE (crtl->return_rtx);
6697 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6698 nreg = hard_regno_nregs (FIRST_RET_REG, mode);
6700 for (i = 0; i < nreg; i++)
6701 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6702 if (crtl->calls_eh_return)
6704 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6705 for (i = 0; i <= 3; i++)
6706 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6709 if (epilogue_p <= 0)
6711 for (i = FIRST_PARM_REG;
6712 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6713 CLEAR_HARD_REG_BIT (temps, i);
6714 if (cfun->static_chain_decl != NULL)
6715 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6717 temp = scavenge_reg (&temps);
6719 if (temp < 0 && live_regs_mask)
6721 HARD_REG_SET temps;
6723 COPY_HARD_REG_SET (temps, *live_regs_mask);
6724 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6725 temp = scavenge_reg (&temps);
6727 if (temp < 0)
6729 rtx adj_reg, tmp_reg, mem;
6731 /* If we reached here, the most likely case is the (sibcall)
6732 epilogue. Put a special push/pop sequence for such case as
6733 the last resort. This looks lengthy but would not be problem
6734 because it seems to be very rare. */
6735 gcc_assert (epilogue_p);
6737 /* ??? There is still the slight possibility that r4 or
6738 r5 have been reserved as fixed registers or assigned
6739 as global registers, and they change during an
6740 interrupt. There are possible ways to handle this:
6742 - If we are adjusting the frame pointer (r14), we can do
6743 with a single temp register and an ordinary push / pop
6744 on the stack.
6745 - Grab any call-used or call-saved registers (i.e. not
6746 fixed or globals) for the temps we need. We might
6747 also grab r14 if we are adjusting the stack pointer.
6748 If we can't find enough available registers, issue
6749 a diagnostic and die - the user must have reserved
6750 way too many registers.
6751 But since all this is rather unlikely to happen and
6752 would require extra testing, we just die if r4 / r5
6753 are not available. */
6754 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6755 && !global_regs[4] && !global_regs[5]);
6757 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6758 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6759 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6760 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6761 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6762 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6763 emit_move_insn (mem, tmp_reg);
6764 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6765 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6766 emit_move_insn (mem, tmp_reg);
6767 emit_move_insn (reg, adj_reg);
6768 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6769 emit_move_insn (adj_reg, mem);
6770 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6771 emit_move_insn (tmp_reg, mem);
6772 /* Tell flow the insns that pop r4/r5 aren't dead. */
6773 emit_use (tmp_reg);
6774 emit_use (adj_reg);
6775 return;
6777 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6779 /* If SIZE is negative, subtract the positive value.
6780 This sometimes allows a constant pool entry to be shared
6781 between prologue and epilogue code. */
6782 if (size < 0)
6784 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6785 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6787 else
6789 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6790 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6792 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6793 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
6794 GEN_INT (size))));
6799 /* Emit the specified insn and mark it as frame related. */
6800 static rtx_insn *
6801 emit_frame_insn (rtx x)
6803 rtx_insn *insn = emit_insn (x);
6804 RTX_FRAME_RELATED_P (insn) = 1;
6805 return insn;
6808 /* Output RTL to push register RN onto the stack. */
6809 static rtx
6810 push (int rn)
6812 rtx x;
6813 if (rn == FPUL_REG)
6814 x = gen_push_fpul ();
6815 else if (rn == FPSCR_REG)
6816 x = gen_push_fpscr ();
6817 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6818 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6820 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6821 return NULL_RTX;
6822 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6824 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6825 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6826 else
6827 x = gen_push (gen_rtx_REG (SImode, rn));
6829 x = emit_frame_insn (x);
6830 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6831 return x;
6834 /* Output RTL to pop register RN from the stack. */
6835 static void
6836 pop (int rn)
6838 rtx x, sp_reg, reg;
6839 if (rn == FPUL_REG)
6840 x = gen_pop_fpul ();
6841 else if (rn == FPSCR_REG)
6842 x = gen_pop_fpscr ();
6843 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6844 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6846 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6847 return;
6848 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6850 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6851 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6852 else
6853 x = gen_pop (gen_rtx_REG (SImode, rn));
6855 x = emit_insn (x);
6857 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6858 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6859 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6860 : SET_DEST (PATTERN (x)));
6861 add_reg_note (x, REG_CFA_RESTORE, reg);
6862 add_reg_note (x, REG_CFA_ADJUST_CFA,
6863 gen_rtx_SET (sp_reg,
6864 plus_constant (SImode, sp_reg,
6865 GET_MODE_SIZE (GET_MODE (reg)))));
6866 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6867 RTX_FRAME_RELATED_P (x) = 1;
6870 /* Generate code to push the regs specified in the mask. */
6871 static void
6872 push_regs (HARD_REG_SET *mask, bool interrupt_handler)
6874 bool skip_fpscr = false;
6876 /* Push PR last; this gives better latencies after the prologue, and
6877 candidates for the return delay slot when there are no general
6878 registers pushed. */
6879 for (int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6880 i < FIRST_PSEUDO_REGISTER; i++)
6882 /* If this is an interrupt handler, and the SZ bit varies,
6883 and we have to push any floating point register, we need
6884 to switch to the correct precision first. */
6885 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6886 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6888 HARD_REG_SET unsaved;
6890 push (FPSCR_REG);
6891 COMPL_HARD_REG_SET (unsaved, *mask);
6892 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6893 skip_fpscr = true;
6895 if (i != PR_REG
6896 && (i != FPSCR_REG || ! skip_fpscr)
6897 && TEST_HARD_REG_BIT (*mask, i))
6899 /* If the ISR has RESBANK attribute assigned, don't push any of
6900 the following registers - R0-R14, MACH, MACL and GBR. */
6901 if (! (sh_cfun_resbank_handler_p ()
6902 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6903 || i == MACH_REG
6904 || i == MACL_REG
6905 || i == GBR_REG)))
6906 push (i);
6910 /* Push banked registers last to improve delay slot opportunities. */
6911 if (interrupt_handler)
6913 bool use_movml = false;
6915 if (TARGET_SH2A)
6917 unsigned int count = 0;
6919 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6920 if (TEST_HARD_REG_BIT (*mask, i))
6921 count++;
6922 else
6923 break;
6925 /* Use movml when all banked registers are pushed. */
6926 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6927 use_movml = true;
6930 if (sh_cfun_resbank_handler_p ())
6931 ; /* Do nothing. */
6932 else if (use_movml)
6934 rtx x, mem, reg, set;
6935 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6937 /* We must avoid scheduling multiple store insn with another
6938 insns. */
6939 emit_insn (gen_blockage ());
6940 x = gen_movml_push_banked (sp_reg);
6941 x = emit_frame_insn (x);
6942 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6944 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
6945 reg = gen_rtx_REG (SImode, i);
6946 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6949 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
6950 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6951 emit_insn (gen_blockage ());
6953 else
6954 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6955 if (TEST_HARD_REG_BIT (*mask, i))
6956 push (i);
6959 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6960 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6961 push (PR_REG);
6964 /* Work out the registers which need to be saved, both as a mask and a
6965 count of saved words. Return the count.
6967 If doing a pragma interrupt function, then push all regs used by the
6968 function, and if we call another function (we can tell by looking at PR),
6969 make sure that all the regs it clobbers are safe too. */
6970 static int
6971 calc_live_regs (HARD_REG_SET *live_regs_mask)
6973 unsigned int reg;
6974 tree attrs;
6975 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6976 bool nosave_low_regs;
6978 attrs = DECL_ATTRIBUTES (current_function_decl);
6979 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6980 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6981 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6982 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6984 CLEAR_HARD_REG_SET (*live_regs_mask);
6985 if (TARGET_FPU_DOUBLE && TARGET_FMOVD && interrupt_handler
6986 && df_regs_ever_live_p (FPSCR_REG))
6987 target_flags &= ~MASK_FPU_SINGLE;
6988 /* If we can save a lot of saves by switching to double mode, do that. */
6989 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD && TARGET_FPU_SINGLE)
6990 for (int count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6991 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6992 && (! call_really_used_regs[reg]
6993 || interrupt_handler)
6994 && ++count > 2)
6996 target_flags &= ~MASK_FPU_SINGLE;
6997 break;
7001 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7002 bool pr_live = (pr_initial
7003 ? (!REG_P (pr_initial)
7004 || REGNO (pr_initial) != (PR_REG))
7005 : df_regs_ever_live_p (PR_REG));
7006 /* For Shcompact, if not optimizing, we end up with a memory reference
7007 using the return address pointer for __builtin_return_address even
7008 though there is no actual need to put the PR register on the stack. */
7009 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7011 /* Force PR to be live if the prologue has to call the SHmedia
7012 argument decoder or register saver. */
7013 bool has_call = pr_live;
7015 int count;
7016 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7018 if (reg == PR_REG
7019 ? pr_live
7020 : interrupt_handler
7021 ? (/* Need to save all the regs ever live. */
7022 (df_regs_ever_live_p (reg)
7023 || (call_really_used_regs[reg]
7024 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7025 || reg == PIC_OFFSET_TABLE_REGNUM)
7026 && has_call))
7027 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7028 && reg != RETURN_ADDRESS_POINTER_REGNUM
7029 && reg != T_REG && reg != GBR_REG
7030 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7031 /* Push fpscr only on targets which have FPU */
7032 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7033 : (/* Only push those regs which are used and need to be saved. */
7034 (false)
7035 || (df_regs_ever_live_p (reg)
7036 && ((!call_really_used_regs[reg]
7037 && !(reg != PIC_OFFSET_TABLE_REGNUM
7038 && fixed_regs[reg] && call_used_regs[reg]))
7039 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7040 || (crtl->calls_eh_return
7041 && (reg == EH_RETURN_DATA_REGNO (0)
7042 || reg == EH_RETURN_DATA_REGNO (1)
7043 || reg == EH_RETURN_DATA_REGNO (2)
7044 || reg == EH_RETURN_DATA_REGNO (3)))
7045 || ((reg == MACL_REG || reg == MACH_REG)
7046 && df_regs_ever_live_p (reg)
7047 && sh_cfun_attr_renesas_p ())
7050 SET_HARD_REG_BIT (*live_regs_mask, reg);
7051 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7053 if (TARGET_FPU_DOUBLE && TARGET_FMOVD
7054 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7056 if (FP_REGISTER_P (reg))
7058 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7060 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7061 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7064 else if (XD_REGISTER_P (reg))
7066 /* Must switch to double mode to access these registers. */
7067 target_flags &= ~MASK_FPU_SINGLE;
7071 if (nosave_low_regs && reg == R8_REG)
7072 break;
7075 return count;
7078 /* Code to generate prologue and epilogue sequences */
7080 /* PUSHED is the number of bytes that are being pushed on the
7081 stack for register saves. Return the frame size, padded
7082 appropriately so that the stack stays properly aligned. */
7083 static HOST_WIDE_INT
7084 rounded_frame_size (int pushed)
7086 HOST_WIDE_INT size = get_frame_size ();
7087 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7089 if (ACCUMULATE_OUTGOING_ARGS)
7090 size += crtl->outgoing_args_size;
7092 return ((size + pushed + align - 1) & -align) - pushed;
7095 /* Expand code for the function prologue. */
7096 void
7097 sh_expand_prologue (void)
7099 int save_flags = target_flags;
7100 tree sp_switch_attr
7101 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7103 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7105 /* We have pretend args if we had an object sent partially in registers
7106 and partially on the stack, e.g. a large structure. */
7107 int pretend_args = crtl->args.pretend_args_size;
7108 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7109 && (NPARM_REGS(SImode)
7110 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7111 pretend_args = 0;
7113 output_stack_adjust (-pretend_args, stack_pointer_rtx, 0, NULL, true);
7114 int stack_usage = pretend_args;
7116 /* Emit the code for SETUP_VARARGS. */
7117 if (cfun->stdarg)
7119 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7121 /* Push arg regs as if they'd been provided by caller in stack. */
7122 for (int i = 0; i < NPARM_REGS(SImode); i++)
7124 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7126 if (i >= (NPARM_REGS(SImode)
7127 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7129 break;
7130 push (rn);
7131 stack_usage += GET_MODE_SIZE (SImode);
7136 /* If we're supposed to switch stacks at function entry, do so now. */
7137 if (sp_switch_attr)
7139 rtx lab, newsrc;
7140 /* The argument specifies a variable holding the address of the
7141 stack the interrupt function should switch to/from at entry/exit. */
7142 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7143 const char* s = ggc_strdup (TREE_STRING_POINTER (arg));
7144 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7146 lab = add_constant (sp_switch, SImode, 0);
7147 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7149 emit_insn (gen_sp_switch_1 (newsrc));
7152 HARD_REG_SET live_regs_mask;
7153 int d = calc_live_regs (&live_regs_mask);
7154 /* ??? Maybe we could save some switching if we can move a mode switch
7155 that already happens to be at the function start into the prologue. */
7156 if (target_flags != save_flags && ! current_function_interrupt)
7157 emit_insn (gen_toggle_sz ());
7159 push_regs (&live_regs_mask, current_function_interrupt);
7160 stack_usage += d;
7162 if (flag_pic && !TARGET_FDPIC
7163 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7164 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7166 if (target_flags != save_flags && ! current_function_interrupt)
7167 emit_insn (gen_toggle_sz ());
7169 target_flags = save_flags;
7171 output_stack_adjust (-rounded_frame_size (d),
7172 stack_pointer_rtx, 0, NULL, true);
7173 stack_usage += rounded_frame_size (d);
7175 if (frame_pointer_needed)
7176 emit_frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7178 /* If we are profiling, make sure no instructions are scheduled before
7179 the call to mcount. Similarly if some call instructions are swapped
7180 before frame related insns, it'll confuse the unwinder because
7181 currently SH has no unwind info for function epilogues. */
7182 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7183 emit_insn (gen_blockage ());
7185 if (flag_stack_usage_info)
7186 current_function_static_stack_size = stack_usage;
7189 /* Expand code for the function epilogue. */
7190 void
7191 sh_expand_epilogue (bool sibcall_p)
7193 int save_flags = target_flags;
7194 bool fpscr_deferred = false;
7195 int e = sibcall_p ? -1 : 1;
7197 HARD_REG_SET live_regs_mask;
7198 int d = calc_live_regs (&live_regs_mask);
7200 int save_size = d;
7201 int frame_size = rounded_frame_size (d);
7203 if (frame_pointer_needed)
7205 /* We must avoid scheduling the epilogue with previous basic blocks.
7206 See PR/18032 and PR/40313. */
7207 emit_insn (gen_blockage ());
7208 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7209 &live_regs_mask, true);
7211 /* We must avoid moving the stack pointer adjustment past code
7212 which reads from the local frame, else an interrupt could
7213 occur after the SP adjustment and clobber data in the local
7214 frame. */
7215 emit_insn (gen_blockage ());
7216 emit_frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7218 else if (frame_size)
7220 /* We must avoid moving the stack pointer adjustment past code
7221 which reads from the local frame, else an interrupt could
7222 occur after the SP adjustment and clobber data in the local
7223 frame. */
7224 emit_insn (gen_blockage ());
7225 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7226 &live_regs_mask, true);
7229 /* Pop all the registers. */
7231 if (target_flags != save_flags && ! current_function_interrupt)
7232 emit_insn (gen_toggle_sz ());
7235 int last_reg;
7237 save_size = 0;
7238 /* For an ISR with RESBANK attribute assigned, don't pop PR
7239 register. */
7240 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7241 && !sh_cfun_resbank_handler_p ())
7243 if (!frame_pointer_needed)
7244 emit_insn (gen_blockage ());
7245 pop (PR_REG);
7248 /* Banked registers are popped first to avoid being scheduled in the
7249 delay slot. RTE switches banks before the ds instruction. */
7250 if (current_function_interrupt)
7252 bool use_movml = false;
7254 if (TARGET_SH2A)
7256 unsigned int count = 0;
7258 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7259 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7260 count++;
7261 else
7262 break;
7264 /* Use movml when all banked register are poped. */
7265 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7266 use_movml = true;
7269 if (sh_cfun_resbank_handler_p ())
7270 ; /* Do nothing. */
7271 else if (use_movml)
7273 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7275 /* We must avoid scheduling multiple load insn with another
7276 insns. */
7277 emit_insn (gen_blockage ());
7278 emit_insn (gen_movml_pop_banked (sp_reg));
7279 emit_insn (gen_blockage ());
7281 else
7282 for (int i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7283 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7284 pop (i);
7286 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7288 else
7289 last_reg = FIRST_PSEUDO_REGISTER;
7291 for (int i = 0; i < last_reg; i++)
7293 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7295 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7296 && hard_reg_set_intersect_p (live_regs_mask,
7297 reg_class_contents[DF_REGS]))
7298 fpscr_deferred = true;
7299 /* For an ISR with RESBANK attribute assigned, don't pop
7300 following registers, R0-R14, MACH, MACL and GBR. */
7301 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7302 && ! (sh_cfun_resbank_handler_p ()
7303 && ((j >= FIRST_GENERAL_REG
7304 && j < LAST_GENERAL_REG)
7305 || j == MACH_REG
7306 || j == MACL_REG
7307 || j == GBR_REG)))
7308 pop (j);
7310 if (j == FIRST_FP_REG && fpscr_deferred)
7311 pop (FPSCR_REG);
7314 if (target_flags != save_flags && ! current_function_interrupt)
7315 emit_insn (gen_toggle_sz ());
7316 target_flags = save_flags;
7318 output_stack_adjust (crtl->args.pretend_args_size + save_size,
7319 stack_pointer_rtx, e, NULL, true);
7321 if (crtl->calls_eh_return)
7322 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7323 EH_RETURN_STACKADJ_RTX));
7325 /* Switch back to the normal stack if necessary. */
7326 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7327 emit_insn (gen_sp_switch_2 ());
7329 /* Tell flow the insn that pops PR isn't dead. */
7330 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7331 emit_use (gen_rtx_REG (SImode, PR_REG));
7334 /* Emit code to change the current function's return address to RA.
7335 TEMP is available as a scratch register, if needed. */
7336 void
7337 sh_set_return_address (rtx ra, rtx tmp)
7339 HARD_REG_SET live_regs_mask;
7340 int d = calc_live_regs (&live_regs_mask);
7342 /* If pr_reg isn't life, we can set it directly. */
7343 if (! TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7345 rtx rr = gen_rtx_REG (SImode, PR_REG);
7346 emit_insn (GEN_MOV (rr, ra));
7347 /* Tell flow the register for return isn't dead. */
7348 emit_use (rr);
7349 return;
7352 int pr_offset = rounded_frame_size (d);
7354 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7356 if (frame_pointer_needed)
7357 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7358 else
7359 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7361 tmp = gen_frame_mem (Pmode, tmp);
7362 emit_insn (GEN_MOV (tmp, ra));
7363 /* Tell this store isn't dead. */
7364 emit_use (tmp);
7367 /* Clear variables at function end. */
7368 static void
7369 sh_output_function_epilogue (FILE *)
7373 static rtx
7374 sh_builtin_saveregs (void)
7376 /* First unnamed integer register. */
7377 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7378 /* Number of integer registers we need to save. */
7379 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7380 /* First unnamed SFmode float reg */
7381 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7382 /* Number of SFmode float regs to save. */
7383 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7384 rtx regbuf, fpregs;
7385 int bufsize, regno;
7386 alias_set_type alias_set;
7388 if (!TARGET_FPU_ANY)
7390 error ("__builtin_saveregs not supported by this subtarget");
7391 return const0_rtx;
7394 /* Allocate block of memory for the regs. */
7395 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7396 Or can assign_stack_local accept a 0 SIZE argument? */
7397 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7399 if (n_floatregs & 1)
7401 rtx addr;
7403 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7404 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7405 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7406 regbuf = change_address (regbuf, BLKmode, addr);
7408 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7410 rtx addr, mask;
7412 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7413 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
7414 XEXP (regbuf, 0), 4));
7415 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7416 emit_insn (gen_andsi3 (addr, addr, mask));
7417 regbuf = change_address (regbuf, BLKmode, addr);
7419 else
7420 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7421 alias_set = get_varargs_alias_set ();
7422 set_mem_alias_set (regbuf, alias_set);
7424 /* Save int args.
7425 This is optimized to only save the regs that are necessary. Explicitly
7426 named args need not be saved. */
7427 if (n_intregs > 0)
7428 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7429 adjust_address (regbuf, BLKmode,
7430 n_floatregs * UNITS_PER_WORD),
7431 n_intregs);
7433 /* Save float args.
7434 This is optimized to only save the regs that are necessary. Explicitly
7435 named args need not be saved.
7436 We explicitly build a pointer to the buffer because it halves the insn
7437 count when not optimizing (otherwise the pointer is built for each reg
7438 saved).
7439 We emit the moves in reverse order so that we can use predecrement. */
7441 fpregs = copy_to_mode_reg (Pmode,
7442 plus_constant (Pmode, XEXP (regbuf, 0),
7443 n_floatregs * UNITS_PER_WORD));
7444 if (TARGET_FPU_DOUBLE)
7446 rtx mem;
7447 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7449 emit_insn (gen_addsi3 (fpregs, fpregs,
7450 GEN_INT (-2 * UNITS_PER_WORD)));
7451 mem = change_address (regbuf, DFmode, fpregs);
7452 emit_move_insn (mem,
7453 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7455 regno = first_floatreg;
7456 if (regno & 1)
7458 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7459 mem = change_address (regbuf, SFmode, fpregs);
7460 emit_move_insn (mem,
7461 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
7462 + regno - SH_REG_MSW_OFFSET));
7465 else
7466 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7468 rtx mem;
7470 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7471 mem = change_address (regbuf, SFmode, fpregs);
7472 emit_move_insn (mem,
7473 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7476 /* Return the address of the regbuf. */
7477 return XEXP (regbuf, 0);
7480 /* Define the `__builtin_va_list' type for the ABI. */
7481 static tree
7482 sh_build_builtin_va_list (void)
7484 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7485 tree record, type_decl;
7487 if ((! TARGET_SH2E && ! TARGET_SH4)
7488 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7489 return ptr_type_node;
7491 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7492 type_decl = build_decl (BUILTINS_LOCATION,
7493 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7495 f_next_o = build_decl (BUILTINS_LOCATION,
7496 FIELD_DECL, get_identifier ("__va_next_o"),
7497 ptr_type_node);
7498 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7499 FIELD_DECL,
7500 get_identifier ("__va_next_o_limit"),
7501 ptr_type_node);
7502 f_next_fp = build_decl (BUILTINS_LOCATION,
7503 FIELD_DECL, get_identifier ("__va_next_fp"),
7504 ptr_type_node);
7505 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7506 FIELD_DECL,
7507 get_identifier ("__va_next_fp_limit"),
7508 ptr_type_node);
7509 f_next_stack = build_decl (BUILTINS_LOCATION,
7510 FIELD_DECL, get_identifier ("__va_next_stack"),
7511 ptr_type_node);
7513 DECL_FIELD_CONTEXT (f_next_o) = record;
7514 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7515 DECL_FIELD_CONTEXT (f_next_fp) = record;
7516 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7517 DECL_FIELD_CONTEXT (f_next_stack) = record;
7519 TYPE_STUB_DECL (record) = type_decl;
7520 TYPE_NAME (record) = type_decl;
7521 TYPE_FIELDS (record) = f_next_o;
7522 DECL_CHAIN (f_next_o) = f_next_o_limit;
7523 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7524 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7525 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7527 layout_type (record);
7529 return record;
7532 /* Implement `va_start' for varargs and stdarg. */
7533 static void
7534 sh_va_start (tree valist, rtx nextarg)
7536 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7537 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7538 tree t, u;
7539 int nfp, nint;
7541 if ((! TARGET_SH2E && ! TARGET_SH4)
7542 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7544 std_expand_builtin_va_start (valist, nextarg);
7545 return;
7548 f_next_o = TYPE_FIELDS (va_list_type_node);
7549 f_next_o_limit = DECL_CHAIN (f_next_o);
7550 f_next_fp = DECL_CHAIN (f_next_o_limit);
7551 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7552 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7554 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7555 NULL_TREE);
7556 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7557 valist, f_next_o_limit, NULL_TREE);
7558 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7559 NULL_TREE);
7560 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7561 valist, f_next_fp_limit, NULL_TREE);
7562 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7563 valist, f_next_stack, NULL_TREE);
7565 /* Call __builtin_saveregs. */
7566 u = make_tree (sizetype, expand_builtin_saveregs ());
7567 u = fold_convert (ptr_type_node, u);
7568 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7569 TREE_SIDE_EFFECTS (t) = 1;
7570 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7572 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7573 if (nfp < 8)
7574 nfp = 8 - nfp;
7575 else
7576 nfp = 0;
7577 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
7578 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7579 TREE_SIDE_EFFECTS (t) = 1;
7580 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7582 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7583 TREE_SIDE_EFFECTS (t) = 1;
7584 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7586 nint = crtl->args.info.arg_count[SH_ARG_INT];
7587 if (nint < 4)
7588 nint = 4 - nint;
7589 else
7590 nint = 0;
7591 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
7592 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7593 TREE_SIDE_EFFECTS (t) = 1;
7594 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7596 u = make_tree (ptr_type_node, nextarg);
7597 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7598 TREE_SIDE_EFFECTS (t) = 1;
7599 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7602 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7603 member, return it. */
7604 static tree
7605 find_sole_member (tree type)
7607 tree field, member = NULL_TREE;
7609 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7611 if (TREE_CODE (field) != FIELD_DECL)
7612 continue;
7613 if (!DECL_SIZE (field))
7614 return NULL_TREE;
7615 if (integer_zerop (DECL_SIZE (field)))
7616 continue;
7617 if (member)
7618 return NULL_TREE;
7619 member = field;
7621 return member;
7624 /* Implement `va_arg'. */
7625 static tree
7626 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7627 gimple_seq *post_p ATTRIBUTE_UNUSED)
7629 tree tmp;
7630 tree addr, lab_over = NULL, result = NULL;
7631 tree eff_type;
7633 const bool pass_by_ref =
7634 !VOID_TYPE_P (type)
7635 && targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7637 if (pass_by_ref)
7638 type = build_pointer_type (type);
7640 HOST_WIDE_INT size = int_size_in_bytes (type);
7641 HOST_WIDE_INT rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7642 tree pptr_type_node = build_pointer_type (ptr_type_node);
7644 if ((TARGET_SH2E || TARGET_SH4)
7645 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7647 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7648 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7649 tree lab_false;
7650 tree member;
7652 f_next_o = TYPE_FIELDS (va_list_type_node);
7653 f_next_o_limit = DECL_CHAIN (f_next_o);
7654 f_next_fp = DECL_CHAIN (f_next_o_limit);
7655 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7656 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7658 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7659 NULL_TREE);
7660 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7661 valist, f_next_o_limit, NULL_TREE);
7662 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7663 valist, f_next_fp, NULL_TREE);
7664 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7665 valist, f_next_fp_limit, NULL_TREE);
7666 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7667 valist, f_next_stack, NULL_TREE);
7669 /* Structures with a single member with a distinct mode are passed
7670 like their member. This is relevant if the latter has a REAL_TYPE
7671 or COMPLEX_TYPE type. */
7672 eff_type = type;
7673 while (TREE_CODE (eff_type) == RECORD_TYPE
7674 && (member = find_sole_member (eff_type))
7675 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7676 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7677 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7679 tree field_type = TREE_TYPE (member);
7681 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7682 eff_type = field_type;
7683 else
7685 gcc_assert ((TYPE_ALIGN (eff_type)
7686 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7687 || (TYPE_ALIGN (eff_type)
7688 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7689 break;
7693 bool pass_as_float;
7694 if (TARGET_FPU_DOUBLE)
7696 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7697 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7698 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7699 && size <= 16));
7701 else
7703 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7706 addr = create_tmp_var (pptr_type_node);
7707 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7708 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7710 valist = build_simple_mem_ref (addr);
7712 if (pass_as_float)
7714 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
7715 tree cmp;
7716 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7718 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7719 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7721 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7722 tmp = next_fp_limit;
7723 if (size > 4 && !is_double)
7724 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
7725 tmp = build2 (GE_EXPR, boolean_type_node,
7726 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7727 cmp = build3 (COND_EXPR, void_type_node, tmp,
7728 build1 (GOTO_EXPR, void_type_node,
7729 unshare_expr (lab_false)), NULL_TREE);
7730 if (!is_double)
7731 gimplify_and_add (cmp, pre_p);
7733 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7734 || (is_double || size == 16))
7736 tmp = fold_convert (sizetype, next_fp_tmp);
7737 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7738 size_int (UNITS_PER_WORD));
7739 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
7740 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7742 if (is_double)
7743 gimplify_and_add (cmp, pre_p);
7745 #ifdef FUNCTION_ARG_SCmode_WART
7746 if (TYPE_MODE (eff_type) == SCmode
7747 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7749 tree subtype = TREE_TYPE (eff_type);
7750 tree real, imag;
7752 imag
7753 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7754 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7756 real
7757 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7758 real = get_initialized_tmp_var (real, pre_p, NULL);
7760 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7761 if (type != eff_type)
7762 result = build1 (VIEW_CONVERT_EXPR, type, result);
7763 result = get_initialized_tmp_var (result, pre_p, NULL);
7765 #endif /* FUNCTION_ARG_SCmode_WART */
7767 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7768 gimplify_and_add (tmp, pre_p);
7770 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7771 gimplify_and_add (tmp, pre_p);
7773 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7774 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7775 gimplify_assign (unshare_expr (next_fp_tmp),
7776 unshare_expr (valist), pre_p);
7778 gimplify_assign (unshare_expr (valist),
7779 unshare_expr (next_fp_tmp), post_p);
7780 valist = next_fp_tmp;
7782 else
7784 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
7785 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7786 unshare_expr (next_o_limit));
7787 tmp = build3 (COND_EXPR, void_type_node, tmp,
7788 build1 (GOTO_EXPR, void_type_node,
7789 unshare_expr (lab_false)),
7790 NULL_TREE);
7791 gimplify_and_add (tmp, pre_p);
7793 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7794 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7796 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7797 gimplify_and_add (tmp, pre_p);
7799 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7800 gimplify_and_add (tmp, pre_p);
7802 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7803 gimplify_assign (unshare_expr (next_o),
7804 unshare_expr (next_o_limit), pre_p);
7806 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7807 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7810 if (!result)
7812 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7813 gimplify_and_add (tmp, pre_p);
7817 /* ??? In va-sh.h, there had been code to make values larger than
7818 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7820 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7821 if (result)
7823 gimplify_assign (result, tmp, pre_p);
7824 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7825 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7826 gimplify_and_add (tmp, pre_p);
7828 else
7829 result = tmp;
7831 if (pass_by_ref)
7832 result = build_va_arg_indirect_ref (result);
7834 return result;
7837 /* 64 bit floating points memory transfers are paired single precision loads
7838 or store. So DWARF information needs fixing in little endian (unless
7839 PR=SZ=1 in FPSCR). */
7841 sh_dwarf_register_span (rtx reg)
7843 unsigned regno = REGNO (reg);
7845 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7846 return NULL_RTX;
7848 return
7849 gen_rtx_PARALLEL (VOIDmode,
7850 gen_rtvec (2,
7851 gen_rtx_REG (SFmode, regno + 1),
7852 gen_rtx_REG (SFmode, regno)));
7855 static machine_mode
7856 sh_promote_function_mode (const_tree type, machine_mode mode,
7857 int *punsignedp, const_tree funtype,
7858 int for_return)
7860 if (sh_promote_prototypes (funtype))
7861 return promote_mode (type, mode, punsignedp);
7862 else
7863 return default_promote_function_mode (type, mode, punsignedp, funtype,
7864 for_return);
7867 static bool
7868 sh_promote_prototypes (const_tree type)
7870 if (TARGET_HITACHI)
7871 return false;
7872 if (! type)
7873 return true;
7874 return ! sh_attr_renesas_p (type);
7877 static bool
7878 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7879 const_tree type, bool named ATTRIBUTE_UNUSED)
7881 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7883 if (targetm.calls.must_pass_in_stack (mode, type))
7884 return true;
7886 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7887 wants to know about pass-by-reference semantics for incoming
7888 arguments. */
7889 if (! cum)
7890 return false;
7892 return false;
7895 static bool
7896 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
7897 const_tree type, bool named ATTRIBUTE_UNUSED)
7899 /* ??? How can it possibly be correct to return true only on the
7900 caller side of the equation? Is there someplace else in the
7901 sh backend that's magically producing the copies? */
7902 return (get_cumulative_args (cum)->outgoing
7903 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7904 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7907 static sh_arg_class
7908 get_sh_arg_class (machine_mode mode)
7910 if (TARGET_FPU_ANY && mode == SFmode)
7911 return SH_ARG_FLOAT;
7913 if (TARGET_FPU_DOUBLE
7914 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7915 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT))
7916 return SH_ARG_FLOAT;
7918 return SH_ARG_INT;
7921 /* Round a register number up to a proper boundary for an arg of mode
7922 MODE.
7923 The SH doesn't care about double alignment, so we only
7924 round doubles to even regs when asked to explicitly. */
7925 static int
7926 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
7928 /* FIXME: This used to be a macro and has been copy pasted into this
7929 function as is. Make this more readable. */
7930 return
7931 (((TARGET_ALIGN_DOUBLE
7932 || (TARGET_FPU_DOUBLE
7933 && (mode == DFmode || mode == DCmode)
7934 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
7935 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
7936 ? (cum.arg_count[(int) get_sh_arg_class (mode)]
7937 + (cum.arg_count[(int) get_sh_arg_class (mode)] & 1))
7938 : cum.arg_count[(int) get_sh_arg_class (mode)]);
7941 /* Return true if arg of the specified mode should be passed in a register
7942 or false otherwise. */
7943 static bool
7944 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
7945 const_tree type)
7947 /* FIXME: This used to be a macro and has been copy pasted into this
7948 function as is. Make this more readable. */
7949 return
7950 ((type == 0
7951 || (! TREE_ADDRESSABLE (type)
7952 && (! (TARGET_HITACHI || cum.renesas_abi)
7953 || ! (AGGREGATE_TYPE_P (type)
7954 || (!TARGET_FPU_ANY
7955 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7956 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
7957 && ! cum.force_mem
7958 && (TARGET_SH2E
7959 ? ((mode) == BLKmode
7960 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
7961 + int_size_in_bytes (type))
7962 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
7963 : ((sh_round_reg (cum, mode)
7964 + sh_hard_regno_nregs (BASE_ARG_REG (mode), mode))
7965 <= NPARM_REGS (mode)))
7966 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
7969 static int
7970 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
7971 tree type, bool named ATTRIBUTE_UNUSED)
7973 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7974 int words = 0;
7976 if (sh_pass_in_reg_p (*cum, mode, type)
7977 && !TARGET_FPU_DOUBLE
7978 && (sh_round_reg (*cum, mode)
7979 + (mode != BLKmode
7980 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
7981 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
7982 > NPARM_REGS (mode)))
7983 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
7985 return words * UNITS_PER_WORD;
7989 /* Define where to put the arguments to a function.
7990 Value is zero to push the argument on the stack,
7991 or a hard register in which to store the argument.
7993 MODE is the argument's machine mode.
7994 TYPE is the data type of the argument (as a tree).
7995 This is null for libcalls where that information may
7996 not be available.
7997 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7998 the preceding args and about the function being called.
7999 NAMED is nonzero if this argument is a named parameter
8000 (otherwise it is an extra parameter matching an ellipsis).
8002 On SH the first args are normally in registers
8003 and the rest are pushed. Any arg that starts within the first
8004 NPARM_REGS words is at least partially passed in a register unless
8005 its data type forbids. */
8006 static rtx
8007 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
8008 const_tree type, bool named)
8010 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8012 if (mode == VOIDmode)
8013 return ca->renesas_abi ? const1_rtx : const0_rtx;
8015 if (sh_pass_in_reg_p (*ca, mode, type)
8016 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8018 int regno;
8020 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8021 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8023 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8024 gen_rtx_REG (SFmode,
8025 BASE_ARG_REG (mode)
8026 + (sh_round_reg (*ca, mode) ^ 1)),
8027 const0_rtx);
8028 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8029 gen_rtx_REG (SFmode,
8030 BASE_ARG_REG (mode)
8031 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8032 GEN_INT (4));
8033 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8036 /* If the alignment of a DF value causes an SF register to be
8037 skipped, we will use that skipped register for the next SF
8038 value. */
8039 if ((TARGET_HITACHI || ca->renesas_abi)
8040 && ca->free_single_fp_reg
8041 && mode == SFmode)
8042 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8044 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8045 ^ (mode == SFmode && TARGET_SH4
8046 && TARGET_LITTLE_ENDIAN
8047 && ! TARGET_HITACHI && ! ca->renesas_abi);
8048 return gen_rtx_REG (mode, regno);
8052 return NULL_RTX;
8055 /* Update the data in CUM to advance over an argument
8056 of mode MODE and data type TYPE.
8057 (TYPE is null for libcalls where that information may not be
8058 available.) */
8059 static void
8060 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
8061 const_tree type, bool named ATTRIBUTE_UNUSED)
8063 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8065 if (ca->force_mem)
8066 ca->force_mem = false;
8068 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8070 /* Note that we've used the skipped register. */
8071 if (mode == SFmode && ca->free_single_fp_reg)
8073 ca->free_single_fp_reg = 0;
8074 return;
8076 /* When we have a DF after an SF, there's an SF register that get
8077 skipped in order to align the DF value. We note this skipped
8078 register, because the next SF value will use it, and not the
8079 SF that follows the DF. */
8080 if (mode == DFmode
8081 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
8083 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
8084 + BASE_ARG_REG (mode));
8088 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8089 || sh_pass_in_reg_p (*ca, mode, type))
8090 (ca->arg_count[(int) get_sh_arg_class (mode)]
8091 = (sh_round_reg (*ca, mode)
8092 + (mode == BLKmode
8093 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8094 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
8097 /* The Renesas calling convention doesn't quite fit into this scheme since
8098 the address is passed like an invisible argument, but one that is always
8099 passed in memory. */
8100 static rtx
8101 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8103 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8104 return NULL_RTX;
8105 return gen_rtx_REG (Pmode, 2);
8108 /* Worker function for TARGET_FUNCTION_VALUE.
8110 For the SH, this is like LIBCALL_VALUE, except that we must change the
8111 mode like PROMOTE_MODE does.
8112 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8113 tested here has to be kept in sync with the one in
8114 explow.c:promote_mode. */
8115 static rtx
8116 sh_function_value (const_tree valtype,
8117 const_tree fn_decl_or_type,
8118 bool outgoing ATTRIBUTE_UNUSED)
8120 if (fn_decl_or_type
8121 && !DECL_P (fn_decl_or_type))
8122 fn_decl_or_type = NULL;
8124 return gen_rtx_REG (
8125 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8126 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8127 && (TREE_CODE (valtype) == INTEGER_TYPE
8128 || TREE_CODE (valtype) == ENUMERAL_TYPE
8129 || TREE_CODE (valtype) == BOOLEAN_TYPE
8130 || TREE_CODE (valtype) == REAL_TYPE
8131 || TREE_CODE (valtype) == OFFSET_TYPE))
8132 && sh_promote_prototypes (fn_decl_or_type)
8133 ? SImode : TYPE_MODE (valtype)),
8134 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8137 /* Worker function for TARGET_LIBCALL_VALUE. */
8138 static rtx
8139 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8141 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8144 /* Return true if N is a possible register number of function value. */
8145 static bool
8146 sh_function_value_regno_p (const unsigned int regno)
8148 return regno == FIRST_RET_REG || (TARGET_SH2E && regno == FIRST_FP_RET_REG);
8151 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8152 static bool
8153 sh_return_in_memory (const_tree type, const_tree fndecl)
8155 return TYPE_MODE (type) == BLKmode
8156 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8157 && TREE_CODE (type) == RECORD_TYPE);
8160 /* We actually emit the code in sh_expand_prologue. We used to use
8161 a static variable to flag that we need to emit this code, but that
8162 doesn't when inlining, when functions are deferred and then emitted
8163 later. Fortunately, we already have two flags that are part of struct
8164 function that tell if a function uses varargs or stdarg. */
8165 static void
8166 sh_setup_incoming_varargs (cumulative_args_t ca,
8167 machine_mode mode,
8168 tree type,
8169 int *pretend_arg_size,
8170 int second_time ATTRIBUTE_UNUSED)
8172 gcc_assert (cfun->stdarg);
8173 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8175 int named_parm_regs, anon_parm_regs;
8177 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
8178 + (mode == BLKmode
8179 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8180 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
8181 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8182 if (anon_parm_regs > 0)
8183 *pretend_arg_size = anon_parm_regs * 4;
8187 static bool
8188 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8190 return false;
8193 static bool
8194 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8196 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8198 return ! (TARGET_HITACHI || ca->renesas_abi);
8202 /* Define the offset between two registers, one to be eliminated, and
8203 the other its replacement, at the start of a routine. */
8205 initial_elimination_offset (int from, int to)
8207 const int regs_saved_rounding = 0;
8208 int save_flags = target_flags;
8209 HARD_REG_SET live_regs_mask;
8211 int regs_saved = calc_live_regs (&live_regs_mask);
8213 int total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8214 target_flags = save_flags;
8216 int total_saved_regs_space = regs_saved + regs_saved_rounding;
8218 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8219 return total_saved_regs_space + total_auto_space;
8221 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8222 return total_saved_regs_space + total_auto_space;
8224 /* Initial gap between fp and sp is 0. */
8225 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8226 return 0;
8228 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8229 return rounded_frame_size (0);
8231 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8232 return rounded_frame_size (0);
8234 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8235 && (to == HARD_FRAME_POINTER_REGNUM
8236 || to == STACK_POINTER_REGNUM));
8237 return total_auto_space;
8240 /* Parse the -mfixed-range= option string. */
8241 void
8242 sh_fix_range (const char *const_str)
8244 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8245 REG2 are either register names or register numbers. The effect
8246 of this option is to mark the registers in the range from REG1 to
8247 REG2 as ``fixed'' so they won't be used by the compiler. */
8249 char* str = strcpy ((char*)alloca (strlen (const_str) + 1), const_str);
8251 while (1)
8253 char* dash = strchr (str, '-');
8254 if (!dash)
8256 warning (0, "value of -mfixed-range must have form REG1-REG2");
8257 return;
8259 *dash = '\0';
8260 char* comma = strchr (dash + 1, ',');
8261 if (comma)
8262 *comma = '\0';
8264 int first = decode_reg_name (str);
8265 if (first < 0)
8267 warning (0, "unknown register name: %s", str);
8268 return;
8271 int last = decode_reg_name (dash + 1);
8272 if (last < 0)
8274 warning (0, "unknown register name: %s", dash + 1);
8275 return;
8278 *dash = '-';
8280 if (first > last)
8282 warning (0, "%s-%s is an empty range", str, dash + 1);
8283 return;
8286 for (int i = first; i <= last; ++i)
8287 fixed_regs[i] = call_used_regs[i] = 1;
8289 if (!comma)
8290 break;
8292 *comma = ',';
8293 str = comma + 1;
8297 /* Insert any deferred function attributes from earlier pragmas. */
8298 static void
8299 sh_insert_attributes (tree node, tree *attributes)
8301 if (TREE_CODE (node) != FUNCTION_DECL)
8302 return;
8304 /* We are only interested in fields. */
8305 if (!DECL_P (node))
8306 return;
8308 /* Append the attributes to the deferred attributes. */
8309 *sh_deferred_function_attributes_tail = *attributes;
8310 tree attrs = sh_deferred_function_attributes;
8311 if (!attrs)
8312 return;
8314 /* Some attributes imply or require the interrupt attribute. */
8315 if (!lookup_attribute ("interrupt_handler", attrs)
8316 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8318 /* If we have a trapa_handler, but no interrupt_handler attribute,
8319 insert an interrupt_handler attribute. */
8320 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8321 /* We can't use sh_pr_interrupt here because that's not in the
8322 java frontend. */
8323 attrs
8324 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8325 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8326 if the interrupt attribute is missing, we ignore the attribute
8327 and warn. */
8328 else if (lookup_attribute ("sp_switch", attrs)
8329 || lookup_attribute ("trap_exit", attrs)
8330 || lookup_attribute ("nosave_low_regs", attrs)
8331 || lookup_attribute ("resbank", attrs))
8333 tree *tail;
8335 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8337 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8338 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8339 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8340 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8341 warning (OPT_Wattributes,
8342 "%qE attribute only applies to interrupt functions",
8343 TREE_PURPOSE (attrs));
8344 else
8346 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8347 NULL_TREE);
8348 tail = &TREE_CHAIN (*tail);
8351 attrs = *attributes;
8355 /* Install the processed list. */
8356 *attributes = attrs;
8358 /* Clear deferred attributes. */
8359 sh_deferred_function_attributes = NULL_TREE;
8360 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8362 return;
8365 /*------------------------------------------------------------------------------
8366 Target specific attributes
8367 Supported attributes are:
8369 * interrupt_handler
8370 Specifies this function is an interrupt handler.
8372 * trapa_handler
8373 Like interrupt_handler, but don't save all registers.
8375 * sp_switch
8376 Specifies an alternate stack for an interrupt handler to run on.
8378 * trap_exit
8379 Use a trapa to exit an interrupt function instead of rte.
8381 * nosave_low_regs
8382 Don't save r0..r7 in an interrupt handler function.
8383 This is useful on SH3* and SH4*, which have a separate set of low
8384 regs for user and privileged modes.
8385 This is mainly to be used for non-reentrant interrupt handlers (i.e.
8386 those that run with interrupts disabled and thus can't be
8387 interrupted thenselves).
8389 * renesas
8390 Use Renesas calling/layout conventions (functions and structures).
8392 * resbank
8393 In case of an interrupt handler function, use a register bank to
8394 save registers R0-R14, MACH, MACL, GBR and PR.
8395 This is available only on SH2A targets.
8397 * function_vector
8398 Declares a function to be called using the TBR relative addressing
8399 mode. Takes an argument that specifies the slot number in the table
8400 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
8403 /* Handle a 'resbank' attribute. */
8404 static tree
8405 sh_handle_resbank_handler_attribute (tree * node, tree name,
8406 tree args ATTRIBUTE_UNUSED,
8407 int flags ATTRIBUTE_UNUSED,
8408 bool * no_add_attrs)
8410 if (!TARGET_SH2A)
8412 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8413 name);
8414 *no_add_attrs = true;
8416 if (TREE_CODE (*node) != FUNCTION_DECL)
8418 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8419 name);
8420 *no_add_attrs = true;
8423 return NULL_TREE;
8426 /* Handle an "interrupt_handler" attribute; arguments as in
8427 struct attribute_spec.handler. */
8428 static tree
8429 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8430 tree args ATTRIBUTE_UNUSED,
8431 int flags ATTRIBUTE_UNUSED,
8432 bool *no_add_attrs)
8434 if (TREE_CODE (*node) != FUNCTION_DECL)
8436 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8437 name);
8438 *no_add_attrs = true;
8441 return NULL_TREE;
8444 /* Handle an 'function_vector' attribute; arguments as in
8445 struct attribute_spec.handler. */
8446 static tree
8447 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8448 tree args ATTRIBUTE_UNUSED,
8449 int flags ATTRIBUTE_UNUSED,
8450 bool * no_add_attrs)
8452 if (!TARGET_SH2A)
8454 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8455 name);
8456 *no_add_attrs = true;
8458 else if (TREE_CODE (*node) != FUNCTION_DECL)
8460 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8461 name);
8462 *no_add_attrs = true;
8464 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8466 /* The argument must be a constant integer. */
8467 warning (OPT_Wattributes,
8468 "%qE attribute argument not an integer constant",
8469 name);
8470 *no_add_attrs = true;
8472 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8474 /* The argument value must be between 0 to 255. */
8475 warning (OPT_Wattributes,
8476 "%qE attribute argument should be between 0 to 255",
8477 name);
8478 *no_add_attrs = true;
8480 return NULL_TREE;
8483 /* Returns true if current function has been assigned the attribute
8484 'function_vector'. */
8485 bool
8486 sh2a_is_function_vector_call (rtx x)
8488 if (GET_CODE (x) == SYMBOL_REF
8489 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8491 tree tr = SYMBOL_REF_DECL (x);
8493 if (sh2a_function_vector_p (tr))
8494 return true;
8497 return false;
8500 /* Returns the function vector number, if the attribute
8501 'function_vector' is assigned, otherwise returns zero. */
8503 sh2a_get_function_vector_number (rtx x)
8505 if ((GET_CODE (x) == SYMBOL_REF)
8506 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8508 tree t = SYMBOL_REF_DECL (x);
8510 if (TREE_CODE (t) != FUNCTION_DECL)
8511 return 0;
8513 for (tree list = SH_ATTRIBUTES (t); list; list = TREE_CHAIN (list))
8514 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8515 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8517 return 0;
8519 else
8520 return 0;
8523 /* Handle an "sp_switch" attribute; arguments as in
8524 struct attribute_spec.handler. */
8525 static tree
8526 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8527 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8529 if (TREE_CODE (*node) != FUNCTION_DECL)
8531 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8532 name);
8533 *no_add_attrs = true;
8535 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8537 /* The argument must be a constant string. */
8538 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8539 name);
8540 *no_add_attrs = true;
8543 return NULL_TREE;
8546 /* Handle an "trap_exit" attribute; arguments as in
8547 struct attribute_spec.handler. */
8548 static tree
8549 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8550 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8552 if (TREE_CODE (*node) != FUNCTION_DECL)
8554 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8555 name);
8556 *no_add_attrs = true;
8558 /* The argument specifies a trap number to be used in a trapa instruction
8559 at function exit (instead of an rte instruction). */
8560 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8562 /* The argument must be a constant integer. */
8563 warning (OPT_Wattributes, "%qE attribute argument not an "
8564 "integer constant", name);
8565 *no_add_attrs = true;
8568 return NULL_TREE;
8571 static tree
8572 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8573 tree name ATTRIBUTE_UNUSED,
8574 tree args ATTRIBUTE_UNUSED,
8575 int flags ATTRIBUTE_UNUSED,
8576 bool *no_add_attrs ATTRIBUTE_UNUSED)
8578 return NULL_TREE;
8581 /* True if __attribute__((renesas)) or -mrenesas. */
8582 bool
8583 sh_attr_renesas_p (const_tree td)
8585 if (TARGET_HITACHI)
8586 return true;
8587 if (td == NULL_TREE)
8588 return false;
8589 if (DECL_P (td))
8590 td = TREE_TYPE (td);
8591 if (td == error_mark_node)
8592 return false;
8593 return lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) != NULL_TREE;
8596 /* True if __attribute__((renesas)) or -mrenesas, for the current
8597 function. */
8598 bool
8599 sh_cfun_attr_renesas_p (void)
8601 return sh_attr_renesas_p (current_function_decl);
8604 /* Returns true if the current function has the "interrupt_handler"
8605 attribute set. */
8606 bool
8607 sh_cfun_interrupt_handler_p (void)
8609 return (lookup_attribute ("interrupt_handler",
8610 DECL_ATTRIBUTES (current_function_decl))
8611 != NULL_TREE);
8614 /* Returns true if FUNC has been assigned the attribute
8615 "function_vector". */
8616 bool
8617 sh2a_function_vector_p (tree func)
8619 if (TREE_CODE (func) != FUNCTION_DECL)
8620 return false;
8622 for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list))
8623 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8624 return true;
8626 return false;
8629 /* Returns true if given tree has the "resbank" attribute set. */
8630 bool
8631 sh_cfun_resbank_handler_p (void)
8633 return ((lookup_attribute ("resbank",
8634 DECL_ATTRIBUTES (current_function_decl))
8635 != NULL_TREE)
8636 && (lookup_attribute ("interrupt_handler",
8637 DECL_ATTRIBUTES (current_function_decl))
8638 != NULL_TREE) && TARGET_SH2A);
8641 /* Returns true if the current function has a "trap_exit" attribute set. */
8642 bool
8643 sh_cfun_trap_exit_p (void)
8645 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
8646 != NULL_TREE;
8649 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8650 static const char *
8651 sh_check_pch_target_flags (int old_flags)
8653 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8654 | MASK_SH_E | MASK_HARD_SH4
8655 | MASK_FPU_SINGLE | MASK_SH4))
8656 return _("created and used with different architectures / ABIs");
8657 if ((old_flags ^ target_flags) & MASK_HITACHI)
8658 return _("created and used with different ABIs");
8659 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8660 return _("created and used with different endianness");
8661 return NULL;
8664 /* Predicates used by the templates. */
8666 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
8667 Used only in general_movsrc_operand. */
8668 bool
8669 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8671 switch (REGNO (op))
8673 case PR_REG:
8674 case MACL_REG:
8675 case MACH_REG:
8676 return true;
8678 return false;
8681 /* Returns true if OP is a floating point value with value 0.0. */
8682 bool
8683 fp_zero_operand (rtx op)
8685 if (GET_MODE (op) != SFmode)
8686 return false;
8688 const REAL_VALUE_TYPE* r = CONST_DOUBLE_REAL_VALUE (op);
8689 return real_equal (r, &dconst0) && ! REAL_VALUE_MINUS_ZERO (*r);
8692 /* Returns true if OP is a floating point value with value 1.0. */
8693 bool
8694 fp_one_operand (rtx op)
8696 if (GET_MODE (op) != SFmode)
8697 return false;
8699 return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1);
8702 /* Return the TLS type for TLS symbols. */
8703 enum tls_model
8704 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8706 if (GET_CODE (op) != SYMBOL_REF)
8707 return TLS_MODEL_NONE;
8708 return SYMBOL_REF_TLS_MODEL (op);
8711 /* Return the destination address of a branch. */
8712 static int
8713 branch_dest (rtx branch)
8715 rtx dest = SET_SRC (PATTERN (branch));
8717 if (GET_CODE (dest) == IF_THEN_ELSE)
8718 dest = XEXP (dest, 1);
8720 return INSN_ADDRESSES (INSN_UID (XEXP (dest, 0)));
8723 /* Return nonzero if REG is not used after INSN.
8724 We assume REG is a reload reg, and therefore does
8725 not live past labels. It may live past calls or jumps though. */
8726 bool
8727 reg_unused_after (rtx reg, rtx_insn *insn)
8729 /* If the reg is set by this instruction, then it is safe for our
8730 case. Disregard the case where this is a store to memory, since
8731 we are checking a register used in the store address. */
8732 rtx set = single_set (insn);
8733 if (set && !MEM_P (SET_DEST (set))
8734 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8735 return true;
8737 while ((insn = NEXT_INSN (insn)))
8739 if (!INSN_P (insn))
8740 continue;
8742 rtx_code code = GET_CODE (insn);
8744 #if 0
8745 /* If this is a label that existed before reload, then the register
8746 is dead here. However, if this is a label added by reorg, then
8747 the register may still be live here. We can't tell the difference,
8748 so we just ignore labels completely. */
8749 if (code == CODE_LABEL)
8750 return 1;
8751 /* else */
8752 #endif
8754 if (code == JUMP_INSN)
8755 return false;
8757 /* If this is a sequence, we must handle them all at once.
8758 We could have for instance a call that sets the target register,
8759 and an insn in a delay slot that uses the register. In this case,
8760 we must return 0. */
8761 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8763 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
8764 bool retval = false;
8766 for (int i = 0; i < seq->len (); i++)
8768 rtx_insn *this_insn = seq->insn (i);
8769 rtx set = single_set (this_insn);
8771 if (CALL_P (this_insn))
8772 code = CALL_INSN;
8773 else if (JUMP_P (this_insn))
8775 if (INSN_ANNULLED_BRANCH_P (this_insn))
8776 return false;
8777 code = JUMP_INSN;
8780 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8781 return false;
8782 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8784 if (!MEM_P (SET_DEST (set)))
8785 retval = true;
8786 else
8787 return false;
8789 if (set == NULL_RTX
8790 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8791 return false;
8793 if (retval)
8794 return true;
8795 else if (code == JUMP_INSN)
8796 return false;
8799 rtx set = single_set (insn);
8800 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8801 return false;
8802 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8803 return !MEM_P (SET_DEST (set));
8804 if (set == NULL && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8805 return false;
8807 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8808 return true;
8810 return true;
8814 static GTY(()) rtx t_reg_rtx;
8816 get_t_reg_rtx (void)
8818 if (! t_reg_rtx)
8819 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
8820 return t_reg_rtx;
8823 static GTY(()) tree fpscr_values;
8825 static void
8826 emit_fpu_switch (rtx scratch, int index)
8828 if (fpscr_values == NULL)
8830 tree t = build_index_type (integer_one_node);
8831 t = build_array_type (integer_type_node, t);
8832 t = build_decl (BUILTINS_LOCATION,
8833 VAR_DECL, get_identifier ("__fpscr_values"), t);
8834 DECL_ARTIFICIAL (t) = 1;
8835 DECL_IGNORED_P (t) = 1;
8836 DECL_EXTERNAL (t) = 1;
8837 TREE_STATIC (t) = 1;
8838 TREE_PUBLIC (t) = 1;
8839 TREE_USED (t) = 1;
8841 fpscr_values = t;
8844 rtx src = DECL_RTL (fpscr_values);
8845 if (!can_create_pseudo_p ())
8847 emit_move_insn (scratch, XEXP (src, 0));
8848 if (index != 0)
8849 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8850 src = adjust_automodify_address (src, SImode, scratch, index * 4);
8852 else
8853 src = adjust_address (src, SImode, index * 4);
8855 emit_insn (gen_lds_fpscr (src));
8858 static rtx get_free_reg (HARD_REG_SET);
8860 /* This function returns a register to use to load the address to load
8861 the fpscr from. Currently it always returns r1 or r7, but when we are
8862 able to use pseudo registers after combine, or have a better mechanism
8863 for choosing a register, it should be done here. */
8864 /* REGS_LIVE is the liveness information for the point for which we
8865 need this allocation. In some bare-bones exit blocks, r1 is live at the
8866 start. We can even have all of r0..r3 being live:
8867 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8868 INSN before which new insns are placed with will clobber the register
8869 we return. If a basic block consists only of setting the return value
8870 register to a pseudo and using that register, the return value is not
8871 live before or after this block, yet we we'll insert our insns right in
8872 the middle. */
8873 static rtx
8874 get_free_reg (HARD_REG_SET regs_live)
8876 if (! TEST_HARD_REG_BIT (regs_live, 1))
8877 return gen_rtx_REG (Pmode, 1);
8879 /* Hard reg 1 is live; since this is a small register classes target,
8880 there shouldn't be anything but a jump before the function end. */
8881 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8882 return gen_rtx_REG (Pmode, 7);
8885 /* This function will set the fpscr from memory.
8886 MODE is the mode we are setting it to. */
8887 void
8888 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8890 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
8891 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8893 rtx addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8894 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8897 /* Is the given character a logical line separator for the assembler? */
8898 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8899 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8900 #endif
8902 static bool
8903 sequence_insn_p (rtx_insn *insn)
8905 rtx_insn* prev = PREV_INSN (insn);
8906 if (prev == NULL)
8907 return false;
8909 rtx_insn* next = NEXT_INSN (prev);
8910 if (next == NULL)
8911 return false;
8913 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
8917 sh_insn_length_adjustment (rtx_insn *insn)
8919 /* Instructions with unfilled delay slots take up an extra two bytes for
8920 the nop in the delay slot. */
8921 if (((NONJUMP_INSN_P (insn)
8922 && GET_CODE (PATTERN (insn)) != USE
8923 && GET_CODE (PATTERN (insn)) != CLOBBER)
8924 || CALL_P (insn) || JUMP_P (insn))
8925 && ! sequence_insn_p (insn)
8926 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8927 return 2;
8929 /* Increase the insn length of a cbranch without a delay slot insn to
8930 force a delay slot which will be stuffed with a nop. */
8931 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
8932 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
8933 && ! sequence_insn_p (insn))
8934 return 2;
8936 /* sh-dsp parallel processing insn take four bytes instead of two. */
8938 if (NONJUMP_INSN_P (insn))
8940 int sum = 0;
8941 rtx body = PATTERN (insn);
8942 const char *templ;
8943 char c;
8944 bool maybe_label = true;
8946 if (GET_CODE (body) == ASM_INPUT)
8947 templ = XSTR (body, 0);
8948 else if (asm_noperands (body) >= 0)
8949 templ
8950 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8951 else
8952 return 0;
8955 int ppi_adjust = 0;
8958 c = *templ++;
8959 while (c == ' ' || c == '\t');
8960 /* all sh-dsp parallel-processing insns start with p.
8961 The only non-ppi sh insn starting with p is pref.
8962 The only ppi starting with pr is prnd. */
8963 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8964 ppi_adjust = 2;
8965 /* The repeat pseudo-insn expands two three insns, a total of
8966 six bytes in size. */
8967 else if ((c == 'r' || c == 'R')
8968 && ! strncasecmp ("epeat", templ, 5))
8969 ppi_adjust = 4;
8970 while (c && c != '\n'
8971 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
8973 /* If this is a label, it is obviously not a ppi insn. */
8974 if (c == ':' && maybe_label)
8976 ppi_adjust = 0;
8977 break;
8979 else if (c == '\'' || c == '"')
8980 maybe_label = false;
8981 c = *templ++;
8983 sum += ppi_adjust;
8984 maybe_label = c != ':';
8986 while (c);
8987 return sum;
8989 return 0;
8992 /* Return TRUE for a valid displacement for the REG+disp addressing
8993 with MODE. */
8994 bool
8995 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
8996 bool allow_zero)
8998 if (! CONST_INT_P (op))
8999 return false;
9002 const HOST_WIDE_INT offset = INTVAL (op);
9003 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
9004 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
9006 /* If the mode does not support any displacement always return false.
9007 Even though an index of '0' is actually always valid, it will cause
9008 troubles when e.g. a DFmode move is split into two SFmode moves,
9009 where one SFmode move will have index '0' and the other move will
9010 have index '4'. */
9011 if (!allow_zero && max_disp < 1)
9012 return false;
9014 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
9018 /* Recognize an RTL expression that is a valid memory address for
9019 an instruction.
9020 The MODE argument is the machine mode for the MEM expression
9021 that wants to use this address.
9022 Allow REG
9023 REG+disp
9024 REG+r0
9025 REG++
9026 --REG
9028 GBR+disp */
9029 static bool
9030 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
9032 if (REG_P (x) && REGNO (x) == GBR_REG)
9033 return true;
9035 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9036 return true;
9037 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9038 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9039 return true;
9040 else if (GET_CODE (x) == PLUS)
9042 rtx xop0 = XEXP (x, 0);
9043 rtx xop1 = XEXP (x, 1);
9045 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
9046 return gbr_displacement (xop1, mode);
9048 if (GET_MODE_SIZE (mode) <= 8
9049 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9050 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
9051 return true;
9053 if (GET_MODE_SIZE (mode) <= 4
9054 || (TARGET_FPU_DOUBLE && TARGET_FMOVD && mode == DFmode))
9056 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9057 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9058 return true;
9059 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9060 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9061 return true;
9065 return false;
9068 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9069 isn't protected by a PIC unspec. */
9070 bool
9071 nonpic_symbol_mentioned_p (rtx x)
9073 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9074 || GET_CODE (x) == PC)
9075 return true;
9077 /* We don't want to look into the possible MEM location of a
9078 CONST_DOUBLE, since we're not going to use it, in general. */
9079 if (GET_CODE (x) == CONST_DOUBLE)
9080 return false;
9082 if (GET_CODE (x) == UNSPEC
9083 && (XINT (x, 1) == UNSPEC_PIC
9084 || XINT (x, 1) == UNSPEC_GOT
9085 || XINT (x, 1) == UNSPEC_GOTOFF
9086 || XINT (x, 1) == UNSPEC_GOTPLT
9087 || XINT (x, 1) == UNSPEC_GOTTPOFF
9088 || XINT (x, 1) == UNSPEC_DTPOFF
9089 || XINT (x, 1) == UNSPEC_TPOFF
9090 || XINT (x, 1) == UNSPEC_PLT
9091 || XINT (x, 1) == UNSPEC_PCREL
9092 || XINT (x, 1) == UNSPEC_SYMOFF
9093 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
9094 || XINT (x, 1) == UNSPEC_GOTFUNCDESC
9095 || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
9096 return false;
9098 const char* fmt = GET_RTX_FORMAT (GET_CODE (x));
9099 for (int i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9101 if (fmt[i] == 'E')
9103 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
9104 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9105 return true;
9107 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9108 return true;
9111 return false;
9114 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9115 @GOTOFF in `reg'. */
9117 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg)
9119 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9120 return orig;
9122 if (GET_CODE (orig) == LABEL_REF
9123 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9125 if (reg == NULL_RTX)
9126 reg = gen_reg_rtx (Pmode);
9128 if (TARGET_FDPIC
9129 && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig))
9131 /* Weak functions may be NULL which doesn't work with
9132 GOTOFFFUNCDESC because the runtime offset is not known. */
9133 if (SYMBOL_REF_WEAK (orig))
9134 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9135 else
9136 emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
9138 else if (TARGET_FDPIC
9139 && (GET_CODE (orig) == LABEL_REF
9140 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig)
9141 && (TREE_READONLY (SYMBOL_REF_DECL (orig))
9142 || SYMBOL_REF_EXTERNAL_P (orig)
9143 || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig))))))
9144 /* In FDPIC, GOTOFF can only be used for writable data. */
9145 emit_insn (gen_symGOT2reg (reg, orig));
9146 else
9147 emit_insn (gen_symGOTOFF2reg (reg, orig));
9148 return reg;
9150 else if (GET_CODE (orig) == SYMBOL_REF)
9152 if (reg == NULL_RTX)
9153 reg = gen_reg_rtx (Pmode);
9155 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
9156 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9157 else
9158 emit_insn (gen_symGOT2reg (reg, orig));
9159 return reg;
9161 return orig;
9164 /* Given a (logical) mode size and an offset in bytes, try to find a the
9165 appropriate displacement value for a mov insn. On SH the displacements
9166 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
9167 15 bytes in QImode. To compensate this we create a new base address by
9168 adding an adjustment value to it.
9170 If the originally requested offset is greater than 127 we prefer using
9171 values 124..127 over 128..131 to increase opportunities to use the
9172 add #imm, Rn insn.
9174 In some cases it is possible that a requested offset might seem unaligned
9175 or inappropriate for the mode size, like offset = 2 and mode size = 4.
9176 This is compensated by adjusting the base address so that the effective
9177 address of the displacement move insn will be aligned.
9179 This is not the best possible way of rebasing the base address, as it
9180 does not look at other present displacement addressings around it.
9181 In some cases this can create more base address adjustments than would
9182 actually be necessary. */
9183 struct disp_adjust
9185 rtx offset_adjust;
9186 rtx mov_disp;
9189 static struct disp_adjust
9190 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
9192 struct disp_adjust res = { NULL_RTX, NULL_RTX };
9194 /* Do not try to use SH2A's large displacements here, because this would
9195 effectively disable the small displacement insns. */
9196 const int mode_sz = GET_MODE_SIZE (mode);
9197 const int mov_insn_sz = mov_insn_size (mode, false);
9198 const int max_disp = sh_max_mov_insn_displacement (mode, false);
9199 const int max_disp_next = max_disp + mov_insn_sz;
9200 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
9201 HOST_WIDE_INT offset_adjust;
9203 /* In some cases this actually does happen and we must check for it. */
9204 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
9205 return res;
9207 /* Keeps the previous behavior for QImode displacement addressing.
9208 This just decides how the offset is re-based. Removing this special
9209 case will result in slightly bigger code on average, but it's not that
9210 bad actually. */
9211 if (mov_insn_sz == 1)
9212 align_modifier = 0;
9214 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
9216 if (mode_sz + offset - offset_adjust <= max_disp_next)
9218 res.offset_adjust = GEN_INT (offset_adjust);
9219 res.mov_disp = GEN_INT (offset - offset_adjust);
9222 return res;
9225 /* Try to modify an illegitimate address and make it legitimate.
9226 If we find one, return the new, valid address.
9227 Otherwise, return the original address. */
9228 static rtx
9229 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9231 if (flag_pic)
9232 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9234 if ((TARGET_FPU_DOUBLE && mode == DFmode)
9235 || (TARGET_SH2E && mode == SFmode))
9236 return x;
9238 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
9239 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
9241 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
9242 INTVAL (XEXP (x, 1)));
9244 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9246 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9247 adj.offset_adjust, NULL_RTX, 0,
9248 OPTAB_LIB_WIDEN);
9249 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9252 return x;
9255 /* Attempt to replace *p, which is an address that needs reloading, with
9256 a valid memory address for an operand of mode MODE.
9257 Like for sh_legitimize_address, for the SH we try to get a normal form
9258 of the address. That will allow inheritance of the address reloads. */
9259 bool
9260 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9261 int itype)
9263 enum reload_type type = (enum reload_type) itype;
9264 const int mode_sz = GET_MODE_SIZE (mode);
9266 if (sh_lra_p ())
9267 return false;
9269 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
9270 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true))
9272 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
9273 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
9275 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9277 push_reload (*p, NULL_RTX, p, NULL,
9278 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9279 return true;
9282 if (TARGET_SH2E && mode == SFmode)
9284 *p = copy_rtx (*p);
9285 push_reload (*p, NULL_RTX, p, NULL,
9286 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9287 return true;
9290 /* FIXME: Do not allow to legitimize QImode and HImode displacement
9291 moves because then reload has a problem figuring the constraint
9292 that the move insn target/source reg must be R0.
9293 Or maybe some handling is wrong in sh_secondary_reload for this
9294 to work properly? */
9295 if ((mode_sz == 4 || mode_sz == 8)
9296 && ! (TARGET_SH4 && mode == DFmode)
9297 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9299 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
9300 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9301 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9302 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9303 return true;
9307 /* We must re-recognize what we created before. */
9308 if (GET_CODE (*p) == PLUS
9309 && (mode_sz == 4 || mode_sz == 8)
9310 && GET_CODE (XEXP (*p, 0)) == PLUS
9311 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9312 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9313 && CONST_INT_P (XEXP (*p, 1))
9314 && ! (TARGET_SH2E && mode == SFmode))
9316 /* Because this address is so complex, we know it must have
9317 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9318 it is already unshared, and needs no further unsharing. */
9319 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9320 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9321 return true;
9324 return false;
9327 /* In the name of slightly smaller debug output, and to cater to
9328 general assembler lossage, recognize various UNSPEC sequences
9329 and turn them back into a direct symbol reference. */
9330 static rtx
9331 sh_delegitimize_address (rtx orig_x)
9333 orig_x = delegitimize_mem_from_attrs (orig_x);
9335 rtx x = orig_x;
9336 if (MEM_P (x))
9337 x = XEXP (x, 0);
9338 if (GET_CODE (x) == CONST)
9340 rtx y = XEXP (x, 0);
9341 if (GET_CODE (y) == UNSPEC)
9343 if (XINT (y, 1) == UNSPEC_GOT
9344 || XINT (y, 1) == UNSPEC_GOTOFF
9345 || XINT (y, 1) == UNSPEC_SYMOFF)
9346 return XVECEXP (y, 0, 0);
9347 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
9349 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
9351 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
9353 if (GET_CODE (symplt) == UNSPEC
9354 && (XINT (symplt, 1) == UNSPEC_PLT
9355 || XINT (symplt, 1) == UNSPEC_PCREL))
9356 return XVECEXP (symplt, 0, 0);
9362 return orig_x;
9365 /* Mark the use of a constant in the literal table. If the constant
9366 has multiple labels, make it unique. */
9367 static rtx
9368 mark_constant_pool_use (rtx x)
9370 if (x == NULL_RTX)
9371 return x;
9373 switch (GET_CODE (x))
9375 case LABEL_REF:
9376 x = XEXP (x, 0);
9377 case CODE_LABEL:
9378 break;
9379 default:
9380 return x;
9383 /* Get the first label in the list of labels for the same constant
9384 and delete another labels in the list. */
9385 rtx_insn* lab = as_a <rtx_insn*> (x);
9386 for (rtx_insn* insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
9388 if (!LABEL_P (insn)
9389 || LABEL_REFS (insn) != NEXT_INSN (insn))
9390 break;
9391 lab = insn;
9394 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9395 as_a<rtx_insn *> (insn)->set_deleted ();
9397 /* Mark constants in a window. */
9398 for (rtx_insn* insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn;
9399 insn = NEXT_INSN (insn))
9401 if (!NONJUMP_INSN_P (insn))
9402 continue;
9404 rtx pattern = PATTERN (insn);
9405 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9406 continue;
9408 switch (XINT (pattern, 1))
9410 case UNSPECV_CONST2:
9411 case UNSPECV_CONST4:
9412 case UNSPECV_CONST8:
9413 XVECEXP (pattern, 0, 1) = const1_rtx;
9414 break;
9415 case UNSPECV_WINDOW_END:
9416 if (XVECEXP (pattern, 0, 0) == x)
9417 return lab;
9418 break;
9419 case UNSPECV_CONST_END:
9420 return lab;
9421 default:
9422 break;
9426 return lab;
9429 /* Return true if it's possible to redirect BRANCH1 to the destination
9430 of an unconditional jump BRANCH2. We only want to do this if the
9431 resulting branch will have a short displacement. */
9432 static bool
9433 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
9435 /* Don't follow if BRANCH2 is possible to be a jump crossing between
9436 hot and cold partitions. */
9437 if (flag_reorder_blocks_and_partition
9438 && simplejump_p (branch2)
9439 && CROSSING_JUMP_P (branch2))
9440 return false;
9442 if (flag_expensive_optimizations && simplejump_p (branch2))
9444 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9445 rtx_insn *insn;
9446 int distance;
9448 for (distance = 0, insn = NEXT_INSN (branch1);
9449 insn && distance < 256;
9450 insn = PREV_INSN (insn))
9452 if (insn == dest)
9453 return true;
9454 else
9455 distance += get_attr_length (insn);
9457 for (distance = 0, insn = NEXT_INSN (branch1);
9458 insn && distance < 256;
9459 insn = NEXT_INSN (insn))
9461 if (insn == dest)
9462 return true;
9463 else
9464 distance += get_attr_length (insn);
9467 return false;
9470 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9471 bool
9472 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9473 unsigned int new_reg)
9475 /* Interrupt functions can only use registers that have already been
9476 saved by the prologue, even if they would normally be
9477 call-clobbered. */
9478 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9479 return false;
9481 return true;
9484 /* Function to update the integer COST
9485 based on the relationship between INSN that is dependent on
9486 DEP_INSN through the dependence LINK. The default is to make no
9487 adjustment to COST. This can be used for example to specify to
9488 the scheduler that an output- or anti-dependence does not incur
9489 the same cost as a data-dependence. The return value should be
9490 the new value for COST. */
9491 static int
9492 sh_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
9493 unsigned int)
9495 rtx reg, use_pat;
9497 if (dep_type == 0)
9499 if (recog_memoized (insn) < 0
9500 || recog_memoized (dep_insn) < 0)
9501 return cost;
9503 rtx dep_set = single_set (dep_insn);
9505 /* The latency that we specify in the scheduling description refers
9506 to the actual output, not to an auto-increment register; for that,
9507 the latency is one. */
9508 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9510 rtx set = single_set (insn);
9512 if (set
9513 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9514 && (!MEM_P (SET_DEST (set))
9515 || !reg_mentioned_p (SET_DEST (dep_set),
9516 XEXP (SET_DEST (set), 0))))
9517 cost = 1;
9519 /* The only input for a call that is timing-critical is the
9520 function's address. */
9521 if (CALL_P (insn))
9523 rtx call = get_call_rtx_from (insn);
9524 if (call
9525 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9526 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9527 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9528 cost -= TARGET_SH4_300 ? 3 : 6;
9530 /* Likewise, the most timing critical input for an sfuncs call
9531 is the function address. However, sfuncs typically start
9532 using their arguments pretty quickly.
9533 Assume a four cycle delay for SH4 before they are needed.
9534 Cached ST40-300 calls are quicker, so assume only a one
9535 cycle delay there.
9536 ??? Maybe we should encode the delays till input registers
9537 are needed by sfuncs into the sfunc call insn. */
9538 /* All sfunc calls are parallels with at least four components.
9539 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9540 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9541 && XVECLEN (PATTERN (insn), 0) >= 4
9542 && (reg = sfunc_uses_reg (insn)))
9544 if (! reg_set_p (reg, dep_insn))
9545 cost -= TARGET_SH4_300 ? 1 : 4;
9547 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9549 attr_type dep_type = get_attr_type (dep_insn);
9550 attr_type type;
9551 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9552 cost--;
9553 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9554 && (type = get_attr_type (insn)) != TYPE_CALL
9555 && type != TYPE_SFUNC)
9556 cost--;
9557 /* When the preceding instruction loads the shift amount of
9558 the following SHAD/SHLD, the latency of the load is increased
9559 by 1 cycle. */
9560 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9561 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9562 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9563 XEXP (SET_SRC (single_set (insn)),
9564 1)))
9565 cost++;
9566 /* When an LS group instruction with a latency of less than
9567 3 cycles is followed by a double-precision floating-point
9568 instruction, FIPR, or FTRV, the latency of the first
9569 instruction is increased to 3 cycles. */
9570 else if (cost < 3
9571 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9572 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9573 cost = 3;
9574 /* The lsw register of a double-precision computation is ready one
9575 cycle earlier. */
9576 else if (reload_completed
9577 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9578 && (use_pat = single_set (insn))
9579 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9580 SET_SRC (use_pat)))
9581 cost -= 1;
9583 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9584 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9585 cost -= 1;
9587 else if (TARGET_SH4_300)
9589 /* Stores need their input register two cycles later. */
9590 attr_type type;
9591 if (dep_set && cost >= 1
9592 && ((type = get_attr_type (insn)) == TYPE_STORE
9593 || type == TYPE_PSTORE
9594 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9596 rtx set = single_set (insn);
9598 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9599 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9601 cost -= 2;
9602 /* But don't reduce the cost below 1 if the address depends
9603 on a side effect of dep_insn. */
9604 if (cost < 1
9605 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9606 cost = 1;
9611 /* An anti-dependence penalty of two applies if the first insn is a double
9612 precision fadd / fsub / fmul. */
9613 else if (!TARGET_SH4_300
9614 && dep_type == REG_DEP_ANTI
9615 && recog_memoized (dep_insn) >= 0
9616 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9617 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9618 /* A lot of alleged anti-flow dependences are fake,
9619 so check this one is real. */
9620 && flow_dependent_p (dep_insn, insn))
9621 cost = 2;
9623 return cost;
9626 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9627 if DEP_INSN is anti-flow dependent on INSN. */
9628 static bool
9629 flow_dependent_p (rtx insn, rtx dep_insn)
9631 rtx tmp = PATTERN (insn);
9633 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9634 return tmp == NULL_RTX;
9637 /* A helper function for flow_dependent_p called through note_stores. */
9638 static void
9639 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9641 rtx * pinsn = (rtx *) data;
9643 if (*pinsn && reg_referenced_p (x, *pinsn))
9644 *pinsn = NULL_RTX;
9647 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9648 'special function' patterns (type sfunc) that clobber pr, but that
9649 do not look like function calls to leaf_function_p. Hence we must
9650 do this extra check. */
9651 static int
9652 sh_pr_n_sets (void)
9654 return DF_REG_DEF_COUNT (PR_REG);
9657 /* Return where to allocate pseudo for a given hard register initial
9658 value. */
9659 static rtx
9660 sh_allocate_initial_value (rtx hard_reg)
9662 if (REGNO (hard_reg) == PR_REG)
9664 if (crtl->is_leaf && ! sh_pr_n_sets ())
9665 return hard_reg;
9666 else
9667 return gen_frame_mem (Pmode, return_address_pointer_rtx);
9670 return NULL_RTX;
9673 /* This function returns "2" to indicate dual issue for the SH4
9674 processor. To be used by the DFA pipeline description. */
9675 static int
9676 sh_issue_rate (void)
9678 if (TARGET_SUPERSCALAR)
9679 return 2;
9680 else
9681 return 1;
9684 /* Functions for ready queue reordering for sched1. */
9686 /* Get weight for mode for a set x. */
9687 static short
9688 find_set_regmode_weight (rtx x, machine_mode mode)
9690 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9691 return 1;
9692 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9694 if (REG_P (SET_DEST (x)))
9696 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9697 return 1;
9698 else
9699 return 0;
9701 return 1;
9703 return 0;
9706 /* Get regmode weight for insn. */
9707 static short
9708 find_insn_regmode_weight (rtx insn, machine_mode mode)
9710 /* Increment weight for each register born here. */
9711 rtx x = PATTERN (insn);
9712 short reg_weight = find_set_regmode_weight (x, mode);
9713 if (GET_CODE (x) == PARALLEL)
9715 int j;
9716 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9718 x = XVECEXP (PATTERN (insn), 0, j);
9719 reg_weight += find_set_regmode_weight (x, mode);
9722 /* Decrement weight for each register that dies here. */
9723 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9725 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9727 rtx note = XEXP (x, 0);
9728 if (REG_P (note) && GET_MODE (note) == mode)
9729 reg_weight--;
9732 return reg_weight;
9735 /* Calculate regmode weights for all insns of a basic block. */
9736 static void
9737 find_regmode_weight (basic_block b, machine_mode mode)
9739 rtx_insn *insn, *next_tail, *head, *tail;
9741 get_ebb_head_tail (b, b, &head, &tail);
9742 next_tail = NEXT_INSN (tail);
9744 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9746 /* Handle register life information. */
9747 if (!INSN_P (insn))
9748 continue;
9750 if (mode == SFmode)
9751 INSN_REGMODE_WEIGHT (insn, mode) =
9752 find_insn_regmode_weight (insn, mode)
9753 + 2 * find_insn_regmode_weight (insn, DFmode);
9754 else if (mode == SImode)
9755 INSN_REGMODE_WEIGHT (insn, mode) =
9756 find_insn_regmode_weight (insn, mode)
9757 + 2 * find_insn_regmode_weight (insn, DImode);
9761 /* Comparison function for ready queue sorting. */
9762 static int
9763 rank_for_reorder (const void *x, const void *y)
9765 rtx_insn *tmp = *(rtx_insn * const *) y;
9766 rtx_insn *tmp2 = *(rtx_insn * const *) x;
9768 /* The insn in a schedule group should be issued the first. */
9769 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9770 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9772 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9773 minimizes instruction movement, thus minimizing sched's effect on
9774 register pressure. */
9775 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9778 /* Resort the array A in which only element at index N may be out of order. */
9779 static void
9780 swap_reorder (rtx_insn **a, int n)
9782 rtx_insn *insn = a[n - 1];
9783 int i = n - 2;
9785 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9787 a[i + 1] = a[i];
9788 i -= 1;
9790 a[i + 1] = insn;
9793 /* Sort the ready list by ascending priority. */
9794 static void
9795 ready_reorder (rtx_insn **ready, int nready)
9797 if (nready == 2)
9798 swap_reorder (ready, nready);
9799 else if (nready > 2)
9800 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
9803 /* Count life regions of r0 for a block. */
9804 static int
9805 find_r0_life_regions (basic_block b)
9807 bool live;
9808 int set;
9809 int death = 0;
9811 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9813 set = 1;
9814 live = true;
9816 else
9818 set = 0;
9819 live = false;
9822 rtx_insn* insn = BB_HEAD (b);
9823 rtx_insn* end = BB_END (b);
9824 rtx r0_reg = gen_rtx_REG (SImode, R0_REG);
9825 while (1)
9827 if (INSN_P (insn))
9829 if (find_regno_note (insn, REG_DEAD, R0_REG))
9831 death++;
9832 live = false;
9835 rtx pset;
9836 if (!live
9837 && (pset = single_set (insn))
9838 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9839 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9841 set++;
9842 live = true;
9845 if (insn == end)
9846 break;
9847 insn = NEXT_INSN (insn);
9849 return set - death;
9852 /* Calculate regmode weights for all insns of all basic block. */
9853 static void
9854 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9855 int verbose ATTRIBUTE_UNUSED,
9856 int old_max_uid)
9858 basic_block b;
9860 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9861 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9862 r0_life_regions = 0;
9864 FOR_EACH_BB_REVERSE_FN (b, cfun)
9866 find_regmode_weight (b, SImode);
9867 find_regmode_weight (b, SFmode);
9868 if (!reload_completed)
9869 r0_life_regions += find_r0_life_regions (b);
9872 CURR_REGMODE_PRESSURE (SImode) = 0;
9873 CURR_REGMODE_PRESSURE (SFmode) = 0;
9876 /* Cleanup. */
9877 static void
9878 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9879 int verbose ATTRIBUTE_UNUSED)
9881 if (regmode_weight[0])
9883 free (regmode_weight[0]);
9884 regmode_weight[0] = NULL;
9886 if (regmode_weight[1])
9888 free (regmode_weight[1]);
9889 regmode_weight[1] = NULL;
9893 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9894 keep count of register pressures on SImode and SFmode. */
9895 static int
9896 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9897 int sched_verbose ATTRIBUTE_UNUSED,
9898 rtx_insn *insn,
9899 int can_issue_more)
9901 if (GET_CODE (PATTERN (insn)) != USE
9902 && GET_CODE (PATTERN (insn)) != CLOBBER)
9903 cached_can_issue_more = can_issue_more - 1;
9904 else
9905 cached_can_issue_more = can_issue_more;
9907 if (reload_completed)
9908 return cached_can_issue_more;
9910 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9911 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9913 return cached_can_issue_more;
9916 static void
9917 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9918 int verbose ATTRIBUTE_UNUSED,
9919 int veclen ATTRIBUTE_UNUSED)
9921 CURR_REGMODE_PRESSURE (SImode) = 0;
9922 CURR_REGMODE_PRESSURE (SFmode) = 0;
9925 /* Some magic numbers. */
9926 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9927 functions that already have high pressure on r0. */
9928 #define R0_MAX_LIFE_REGIONS 2
9929 /* Register Pressure thresholds for SImode and SFmode registers. */
9930 #define SIMODE_MAX_WEIGHT 5
9931 #define SFMODE_MAX_WEIGHT 10
9933 /* Return true if the pressure is high for MODE. */
9934 static bool
9935 high_pressure (machine_mode mode)
9937 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9938 functions that already have high pressure on r0. */
9939 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9940 return true;
9942 if (mode == SFmode)
9943 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9944 else
9945 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9948 /* Reorder ready queue if register pressure is high. */
9949 static int
9950 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9951 int sched_verbose ATTRIBUTE_UNUSED,
9952 rtx_insn **ready,
9953 int *n_readyp,
9954 int clock_var ATTRIBUTE_UNUSED)
9956 if (reload_completed)
9957 return sh_issue_rate ();
9959 if (high_pressure (SFmode) || high_pressure (SImode))
9961 ready_reorder (ready, *n_readyp);
9964 return sh_issue_rate ();
9967 /* Skip cycles if the current register pressure is high. */
9968 static int
9969 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9970 int sched_verbose ATTRIBUTE_UNUSED,
9971 rtx_insn **ready ATTRIBUTE_UNUSED,
9972 int *n_readyp ATTRIBUTE_UNUSED,
9973 int clock_var ATTRIBUTE_UNUSED)
9975 if (reload_completed)
9976 return cached_can_issue_more;
9978 if (high_pressure(SFmode) || high_pressure (SImode))
9979 skip_cycles = 1;
9981 return cached_can_issue_more;
9984 /* Skip cycles without sorting the ready queue. This will move insn from
9985 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9986 queue by sh_reorder. */
9988 /* Generally, skipping these many cycles are sufficient for all insns to move
9989 from Q -> R. */
9990 #define MAX_SKIPS 8
9992 static int
9993 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9994 int sched_verbose ATTRIBUTE_UNUSED,
9995 rtx_insn *insn ATTRIBUTE_UNUSED,
9996 int last_clock_var,
9997 int clock_var,
9998 int *sort_p)
10000 if (reload_completed)
10001 return 0;
10003 if (skip_cycles)
10005 if ((clock_var - last_clock_var) < MAX_SKIPS)
10007 *sort_p = 0;
10008 return 1;
10010 /* If this is the last cycle we are skipping, allow reordering of R. */
10011 if ((clock_var - last_clock_var) == MAX_SKIPS)
10013 *sort_p = 1;
10014 return 1;
10018 skip_cycles = 0;
10020 return 0;
10023 static bool
10024 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10026 return TARGET_HITACHI || sh_attr_renesas_p (record_type);
10030 On the SH1..SH4, the trampoline looks like
10031 2 0002 D202 mov.l l2,r2
10032 1 0000 D301 mov.l l1,r3
10033 3 0004 422B jmp @r2
10034 4 0006 0009 nop
10035 5 0008 00000000 l1: .long area
10036 6 000c 00000000 l2: .long function
10038 FDPIC needs a form that includes a function descriptor and
10039 code to load the GOT register:
10040 0 0000 00000000 .long l0
10041 1 0004 00000000 .long gotval
10042 2 0008 D302 l0: mov.l l1,r3
10043 3 000a D203 mov.l l2,r2
10044 4 000c 6122 mov.l @r2,r1
10045 5 000e 5C21 mov.l @(4,r2),r12
10046 6 0010 412B jmp @r1
10047 7 0012 0009 nop
10048 8 0014 00000000 l1: .long area
10049 9 0018 00000000 l2: .long function
10051 SH5 (compact) uses r1 instead of r3 for the static chain. */
10053 /* Emit insns to store a value at memory address + offset. */
10054 static void
10055 sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value)
10057 gcc_assert ((offset & 3) == 0);
10058 emit_move_insn (offset == 0
10059 ? change_address (addr, SImode, NULL_RTX)
10060 : adjust_address (addr, SImode, offset), value);
10063 /* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2. */
10064 static void
10065 sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1)
10067 sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN
10068 ? (w0 | (w1 << 16))
10069 : (w1 | (w0 << 16)), SImode));
10072 /* Emit RTL insns to initialize the variable parts of a trampoline.
10073 FNADDR is an RTX for the address of the function's pure code.
10074 CXT is an RTX for the static chain value for the function. */
10075 static void
10076 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10078 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10079 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10081 if (TARGET_FDPIC)
10083 rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
10085 sh_emit_storesi (tramp_mem, 0, a);
10086 sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ());
10088 sh_emit_storehi (tramp_mem, 8, 0xd302, 0xd203);
10089 sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21);
10090 sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009);
10092 sh_emit_storesi (tramp_mem, 20, cxt);
10093 sh_emit_storesi (tramp_mem, 24, fnaddr);
10095 else
10097 sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301);
10098 sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009);
10100 sh_emit_storesi (tramp_mem, 8, cxt);
10101 sh_emit_storesi (tramp_mem, 12, fnaddr);
10103 if (TARGET_HARD_SH4)
10105 if (!TARGET_INLINE_IC_INVALIDATE
10106 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
10107 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10108 FUNCTION_ORDINARY).sym,
10109 LCT_NORMAL, VOIDmode, tramp, SImode);
10110 else
10111 emit_insn (gen_ic_invalidate_line (tramp));
10115 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10116 static rtx
10117 sh_trampoline_adjust_address (rtx tramp)
10119 return tramp;
10122 /* If PIC, we cannot make sibling calls to global functions
10123 because the PLT requires r12 to be live. */
10124 static bool
10125 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10127 return (1
10128 && ! sh_cfun_interrupt_handler_p ()
10129 && (! flag_pic || TARGET_FDPIC
10130 || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
10131 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10134 /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */
10135 void
10136 sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
10138 const_tree decl = SYMBOL_REF_DECL (sym);
10139 bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
10141 if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
10142 emit_insn (gen_sym_label2reg (reg, sym, lab));
10143 else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
10144 emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
10145 else
10146 emit_insn (gen_symPLT_label2reg (reg, sym, lab));
10149 /* Machine specific built-in functions. */
10151 struct builtin_description
10153 bool (* const is_enabled) (void);
10154 const enum insn_code icode;
10155 const char *const name;
10156 int signature;
10157 tree fndecl;
10160 /* This function can be used if there are any built-ins that are not for
10161 SHmedia. It's commented out to avoid the defined-but-unused warning. */
10162 static bool
10163 sh1_builtin_p (void)
10165 return TARGET_SH1;
10168 /* describe number and signedness of arguments; arg[0] == result
10169 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10170 /* 9: 64-bit pointer, 10: 32-bit pointer */
10171 static const char signature_args[][4] =
10173 #define SH_BLTIN_V2SI2 0
10174 { 4, 4 },
10175 #define SH_BLTIN_V4HI2 1
10176 { 4, 4 },
10177 #define SH_BLTIN_V2SI3 2
10178 { 4, 4, 4 },
10179 #define SH_BLTIN_V4HI3 3
10180 { 4, 4, 4 },
10181 #define SH_BLTIN_V8QI3 4
10182 { 4, 4, 4 },
10183 #define SH_BLTIN_MAC_HISI 5
10184 { 1, 4, 4, 1 },
10185 #define SH_BLTIN_SH_HI 6
10186 { 4, 4, 1 },
10187 #define SH_BLTIN_SH_SI 7
10188 { 4, 4, 1 },
10189 #define SH_BLTIN_V4HI2V2SI 8
10190 { 4, 4, 4 },
10191 #define SH_BLTIN_V4HI2V8QI 9
10192 { 4, 4, 4 },
10193 #define SH_BLTIN_SISF 10
10194 { 4, 2 },
10195 #define SH_BLTIN_LDUA_L 11
10196 { 2, 10 },
10197 #define SH_BLTIN_LDUA_Q 12
10198 { 1, 10 },
10199 #define SH_BLTIN_STUA_L 13
10200 { 0, 10, 2 },
10201 #define SH_BLTIN_STUA_Q 14
10202 { 0, 10, 1 },
10203 #define SH_BLTIN_LDUA_L64 15
10204 { 2, 9 },
10205 #define SH_BLTIN_LDUA_Q64 16
10206 { 1, 9 },
10207 #define SH_BLTIN_STUA_L64 17
10208 { 0, 9, 2 },
10209 #define SH_BLTIN_STUA_Q64 18
10210 { 0, 9, 1 },
10211 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10212 #define SH_BLTIN_2 19
10213 #define SH_BLTIN_SU 19
10214 { 1, 2 },
10215 #define SH_BLTIN_3 20
10216 #define SH_BLTIN_SUS 20
10217 { 2, 2, 1 },
10218 #define SH_BLTIN_PSSV 21
10219 { 0, 8, 2, 2 },
10220 #define SH_BLTIN_XXUU 22
10221 #define SH_BLTIN_UUUU 22
10222 { 1, 1, 1, 1 },
10223 #define SH_BLTIN_PV 23
10224 { 0, 8 },
10225 #define SH_BLTIN_VP 24
10226 { 8, 0 },
10227 #define SH_BLTIN_UV 25
10228 { 1, 0 },
10229 #define SH_BLTIN_VU 26
10230 { 0, 1 },
10232 /* mcmv: operands considered unsigned. */
10233 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10234 /* mperm: control value considered unsigned int. */
10235 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10236 /* mshards_q: returns signed short. */
10237 /* nsb: takes long long arg, returns unsigned char. */
10238 static struct builtin_description bdesc[] =
10240 { sh1_builtin_p,
10241 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
10242 { sh1_builtin_p,
10243 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
10246 static tree sh_builtin_get_fpscr;
10247 static tree sh_builtin_set_fpscr;
10249 static void
10250 sh_init_builtins (void)
10252 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10253 memset (shared, 0, sizeof shared);
10255 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
10257 builtin_description* d = &bdesc[di];
10259 if (!d->is_enabled ())
10260 continue;
10262 tree type, arg_type = NULL_TREE;
10263 int signature = d->signature;
10265 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10266 type = shared[signature];
10267 else
10269 int has_result = signature_args[signature][0] != 0;
10270 tree args[3];
10272 if (! TARGET_FPU_ANY
10273 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10274 continue;
10275 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
10276 args[i] = NULL_TREE;
10277 for (int i = 3; ; i--)
10279 int arg = signature_args[signature][i];
10280 int opno = i - 1 + has_result;
10282 if (arg & 8)
10283 arg_type = ptr_type_node;
10284 else if (arg)
10285 arg_type = (*lang_hooks.types.type_for_mode)
10286 (insn_data[d->icode].operand[opno].mode, (arg & 1));
10287 else if (i)
10288 continue;
10289 else
10290 arg_type = void_type_node;
10291 if (i == 0)
10292 break;
10293 args[i-1] = arg_type;
10295 type = build_function_type_list (arg_type, args[0], args[1],
10296 args[2], NULL_TREE);
10297 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10298 shared[signature] = type;
10300 d->fndecl =
10301 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10302 NULL, NULL_TREE);
10303 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
10304 if (d->icode == CODE_FOR_sts_fpscr)
10305 sh_builtin_get_fpscr = d->fndecl;
10306 else if (d->icode == CODE_FOR_set_fpscr)
10307 sh_builtin_set_fpscr = d->fndecl;
10311 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
10313 static void
10314 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
10316 const unsigned SH_FE_INVALID = 64;
10317 const unsigned SH_FE_DIVBYZERO = 32;
10318 const unsigned SH_FE_OVERFLOW = 16;
10319 const unsigned SH_FE_UNDERFLOW = 8;
10320 const unsigned SH_FE_INEXACT = 4;
10321 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
10322 | SH_FE_DIVBYZERO
10323 | SH_FE_OVERFLOW
10324 | SH_FE_UNDERFLOW
10325 | SH_FE_INEXACT);
10326 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
10327 tree fenv_var, mask, ld_fenv, masked_fenv;
10328 tree new_fenv_var, reload_fenv, restore_fnenv;
10329 tree update_call, atomic_feraiseexcept, hold_fnclex;
10331 if (! TARGET_FPU_ANY)
10332 return;
10334 /* Generate the equivalent of :
10335 unsigned int fenv_var;
10336 fenv_var = __builtin_sh_get_fpscr ();
10338 unsigned int masked_fenv;
10339 masked_fenv = fenv_var & mask;
10341 __builtin_sh_set_fpscr (masked_fenv); */
10343 fenv_var = create_tmp_var_raw (unsigned_type_node);
10344 mask = build_int_cst (unsigned_type_node,
10345 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
10346 | SH_FE_ALL_EXCEPT));
10347 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
10348 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
10349 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
10350 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10351 fenv_var = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
10352 build2 (COMPOUND_EXPR, void_type_node, masked_fenv,
10353 ld_fenv),
10354 NULL_TREE, NULL_TREE);
10355 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, hold_fnclex);
10357 /* Store the value of masked_fenv to clear the exceptions:
10358 __builtin_sh_set_fpscr (masked_fenv); */
10360 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10362 /* Generate the equivalent of :
10363 unsigned int new_fenv_var;
10364 new_fenv_var = __builtin_sh_get_fpscr ();
10366 __builtin_sh_set_fpscr (fenv_var);
10368 __atomic_feraiseexcept (new_fenv_var); */
10370 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
10371 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
10372 build_call_expr (sh_builtin_get_fpscr, 0));
10373 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
10374 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
10375 update_call = build_call_expr (atomic_feraiseexcept, 1,
10376 fold_convert (integer_type_node,
10377 new_fenv_var));
10378 *update = build2 (COMPOUND_EXPR, void_type_node,
10379 build2 (COMPOUND_EXPR, void_type_node,
10380 reload_fenv, restore_fnenv), update_call);
10383 /* Implements target hook vector_mode_supported_p. */
10384 bool
10385 sh_vector_mode_supported_p (machine_mode mode ATTRIBUTE_UNUSED)
10387 return false;
10390 bool
10391 sh_frame_pointer_required (void)
10393 /* If needed override this in other tm.h files to cope with various OS
10394 lossage requiring a frame pointer. */
10395 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10396 return true;
10398 if (crtl->profile)
10399 return true;
10401 return false;
10404 /* Implements target hook dwarf_calling_convention. Return an enum
10405 of dwarf_calling_convention. */
10407 sh_dwarf_calling_convention (const_tree func)
10409 if (sh_attr_renesas_p (func))
10410 return DW_CC_GNU_renesas_sh;
10412 return DW_CC_normal;
10415 /* Returns the sh builtin decl for CODE. */
10416 static tree
10417 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10419 if (code >= ARRAY_SIZE (bdesc))
10420 return error_mark_node;
10422 if (!bdesc[code].is_enabled ())
10423 return error_mark_node;
10425 return bdesc[code].fndecl;
10428 /* Expand an expression EXP that calls a built-in function,
10429 with result going to TARGET if that's convenient
10430 (and in mode MODE if that's convenient).
10431 SUBTARGET may be used as the target for computing one of EXP's operands.
10432 IGNORE is nonzero if the value is to be ignored. */
10433 static rtx
10434 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10435 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10437 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10438 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10439 const struct builtin_description *d = &bdesc[fcode];
10440 enum insn_code icode = d->icode;
10441 int signature = d->signature;
10442 int nop = 0;
10443 rtx op[4];
10445 if (signature_args[signature][0])
10447 if (ignore)
10448 return NULL_RTX;
10450 machine_mode tmode = insn_data[icode].operand[0].mode;
10451 if (! target || GET_MODE (target) != tmode
10452 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10453 target = gen_reg_rtx (tmode);
10454 op[nop++] = target;
10456 else
10457 target = NULL_RTX;
10459 for (int i = 1; i <= 3; i++, nop++)
10461 if (! signature_args[signature][i])
10462 break;
10463 tree arg = CALL_EXPR_ARG (exp, i - 1);
10464 if (arg == error_mark_node)
10465 return const0_rtx;
10467 machine_mode opmode;
10468 tree optype;
10469 if (signature_args[signature][i] & 8)
10471 opmode = ptr_mode;
10472 optype = ptr_type_node;
10474 else
10476 opmode = insn_data[icode].operand[nop].mode;
10477 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10480 machine_mode argmode = TYPE_MODE (TREE_TYPE (arg));
10481 if (argmode != opmode)
10482 arg = build1 (NOP_EXPR, optype, arg);
10483 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10484 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10485 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10488 rtx pat = NULL_RTX;
10490 switch (nop)
10492 case 1:
10493 pat = (*insn_data[d->icode].genfun) (op[0]);
10494 break;
10495 case 2:
10496 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10497 break;
10498 case 3:
10499 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10500 break;
10501 case 4:
10502 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10503 break;
10504 default:
10505 gcc_unreachable ();
10507 if (! pat)
10508 return NULL_RTX;
10509 emit_insn (pat);
10510 return target;
10513 /* Implement TARGET_HARD_REGNO_NREGS. On the SH all but the XD regs are
10514 UNITS_PER_WORD bits wide. */
10516 static unsigned int
10517 sh_hard_regno_nregs (unsigned int regno, machine_mode mode)
10519 if (XD_REGISTER_P (regno))
10520 return CEIL (GET_MODE_SIZE (mode), 2 * UNITS_PER_WORD);
10521 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
10524 /* Implement TARGET_HARD_REGNO_MODE_OK.
10526 We can allow any mode in any general register. The special registers
10527 only allow SImode. Don't allow any mode in the PR.
10529 We cannot hold DCmode values in the XD registers because alter_reg
10530 handles subregs of them incorrectly. We could work around this by
10531 spacing the XD registers like the DR registers, but this would require
10532 additional memory in every compilation to hold larger register vectors.
10533 We could hold SFmode / SCmode values in XD registers, but that
10534 would require a tertiary reload when reloading from / to memory,
10535 and a secondary reload to reload from / to general regs; that
10536 seems to be a losing proposition.
10538 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10539 it won't be ferried through GP registers first. */
10540 static bool
10541 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10543 if (SPECIAL_REGISTER_P (regno))
10544 return mode == SImode;
10546 if (regno == FPUL_REG)
10547 return (mode == SImode || mode == SFmode);
10549 if (FP_REGISTER_P (regno) && mode == SFmode)
10550 return true;
10552 if (mode == V2SFmode)
10554 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10555 || GENERAL_REGISTER_P (regno)))
10556 return true;
10557 else
10558 return false;
10561 if (mode == V4SFmode)
10563 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10564 || GENERAL_REGISTER_P (regno))
10565 return true;
10566 else
10567 return false;
10570 if (mode == V16SFmode)
10571 return regno == FIRST_XD_REG;
10573 if (FP_REGISTER_P (regno))
10575 if (mode == SFmode
10576 || mode == SImode
10577 || ((TARGET_SH2E) && mode == SCmode)
10578 || (((TARGET_FPU_DOUBLE && mode == DFmode) || mode == DCmode)
10579 && ((regno - FIRST_FP_REG) & 1) == 0)
10580 || (TARGET_SH4 && mode == TImode
10581 && ((regno - FIRST_FP_REG) & 3) == 0))
10582 return true;
10583 else
10584 return false;
10587 if (XD_REGISTER_P (regno))
10588 return mode == DFmode;
10590 if (regno == PR_REG)
10591 return mode == SImode;
10593 if (regno == FPSCR_REG)
10594 return mode == SImode;
10596 return true;
10599 /* Implement TARGET_MODES_TIEABLE_P.
10601 If TARGET_HARD_REGNO_MODE_OK could produce different values for MODE1
10602 and MODE2, for any hard reg, then this must be false for correct output.
10603 That's the case for xd registers: we don't hold SFmode values in
10604 them, so we can't tie an SFmode pseudos with one in another
10605 floating-point mode. */
10607 static bool
10608 sh_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10610 return (mode1 == mode2
10611 || (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
10612 && (mode1 != SFmode && mode2 != SFmode)));
10615 /* Specify the modes required to caller save a given hard regno.
10616 choose_hard_reg_mode chooses mode based on TARGET_HARD_REGNO_MODE_OK
10617 and returns ?Imode for float regs when sh_hard_regno_mode_ok
10618 permits integer modes on them. That makes LRA's split process
10619 unhappy. See PR55212.
10621 machine_mode
10622 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
10623 machine_mode mode)
10625 if (FP_REGISTER_P (regno)
10626 && (mode == SFmode
10627 || mode == SCmode
10628 || ((mode == DFmode || mode == DCmode)
10629 && ((regno - FIRST_FP_REG) & 1) == 0)))
10630 return mode;
10632 return choose_hard_reg_mode (regno, nregs, false);
10635 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10636 static bool
10637 sh_can_change_mode_class (machine_mode from, machine_mode to,
10638 reg_class_t rclass)
10640 /* We want to enable the use of SUBREGs as a means to
10641 VEC_SELECT a single element of a vector. */
10643 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
10644 This can be problematic when SFmode vector subregs need to be accessed
10645 on the stack with displacement addressing, as it happens with -O0.
10646 Thus we disallow the mode change for -O0. */
10647 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10648 return optimize ? !reg_classes_intersect_p (GENERAL_REGS, rclass) : true;
10650 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10652 if (TARGET_LITTLE_ENDIAN)
10654 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10655 return !reg_classes_intersect_p (DF_REGS, rclass);
10657 else
10659 if (GET_MODE_SIZE (from) < 8)
10660 return !reg_classes_intersect_p (DF_REGS, rclass);
10663 return true;
10666 /* Return true if registers in machine mode MODE will likely be
10667 allocated to registers in small register classes. */
10668 bool
10669 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
10671 return true;
10674 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10675 that label is used. */
10676 void
10677 sh_mark_label (rtx address, int nuses)
10679 if (GOTOFF_P (address))
10681 /* Extract the label or symbol. */
10682 address = XEXP (address, 0);
10683 if (GET_CODE (address) == PLUS)
10684 address = XEXP (address, 0);
10685 address = XVECEXP (address, 0, 0);
10687 if (GET_CODE (address) == LABEL_REF
10688 && LABEL_P (XEXP (address, 0)))
10689 LABEL_NUSES (XEXP (address, 0)) += nuses;
10692 /* Compute extra cost of moving data between one register class
10693 and another.
10695 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10696 uses this information. Hence, the general register <-> floating point
10697 register information here is not used for SFmode. */
10698 static int
10699 sh_register_move_cost (machine_mode mode,
10700 reg_class_t srcclass, reg_class_t dstclass)
10702 if (dstclass == T_REGS || dstclass == PR_REGS)
10703 return 10;
10705 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10706 return 4;
10708 if (mode == SImode && TARGET_FMOVD
10709 && REGCLASS_HAS_FP_REG (srcclass)
10710 && REGCLASS_HAS_FP_REG (dstclass))
10711 return 4;
10713 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10714 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10716 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10717 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10718 return 9;
10720 if ((REGCLASS_HAS_FP_REG (dstclass)
10721 && REGCLASS_HAS_GENERAL_REG (srcclass))
10722 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10723 && REGCLASS_HAS_FP_REG (srcclass)))
10725 /* Discourage trying to use fp regs for a pointer. This also
10726 discourages fp regs with SImode because Pmode is an alias
10727 of SImode on this target. See PR target/48596. */
10728 int addend = (mode == Pmode) ? 40 : 0;
10730 return ((TARGET_FMOVD ? 8 : 12) + addend)
10731 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10734 if ((dstclass == FPUL_REGS
10735 && REGCLASS_HAS_GENERAL_REG (srcclass))
10736 || (srcclass == FPUL_REGS
10737 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10738 return 5;
10740 if ((dstclass == FPUL_REGS
10741 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10742 || (srcclass == FPUL_REGS
10743 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10744 return 7;
10746 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10747 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10748 return 4;
10750 if (TARGET_FMOVD
10751 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10752 && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10753 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10755 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10758 static rtx
10759 emit_load_ptr (rtx reg, rtx addr)
10761 rtx mem = gen_const_mem (ptr_mode, addr);
10763 if (Pmode != ptr_mode)
10764 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10765 return emit_move_insn (reg, mem);
10768 static void
10769 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10770 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10771 tree function)
10773 CUMULATIVE_ARGS cum;
10774 int structure_value_byref = 0;
10775 rtx this_rtx, this_value, sibcall, funexp;
10776 rtx_insn *insns;
10777 tree funtype = TREE_TYPE (function);
10778 int simple_add = CONST_OK_FOR_ADD (delta);
10779 int did_load = 0;
10780 rtx scratch0, scratch1, scratch2;
10782 reload_completed = 1;
10783 epilogue_completed = 1;
10784 crtl->uses_only_leaf_regs = 1;
10786 emit_note (NOTE_INSN_PROLOGUE_END);
10788 /* Find the "this" pointer. We have such a wide range of ABIs for the
10789 SH that it's best to do this completely machine independently.
10790 "this" is passed as first argument, unless a structure return pointer
10791 comes first, in which case "this" comes second. */
10792 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10793 #ifndef PCC_STATIC_STRUCT_RETURN
10794 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10795 structure_value_byref = 1;
10796 #endif /* not PCC_STATIC_STRUCT_RETURN */
10797 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10799 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10801 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
10803 this_rtx
10804 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
10806 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10807 static chain pointer (even if you can't have nested virtual functions
10808 right now, someone might implement them sometime), and the rest of the
10809 registers are used for argument passing, are callee-saved, or reserved. */
10810 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10811 -ffixed-reg has been used. */
10812 if (! call_used_regs[0] || fixed_regs[0])
10813 error ("r0 needs to be available as a call-clobbered register");
10814 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10817 if (call_used_regs[1] && ! fixed_regs[1])
10818 scratch1 = gen_rtx_REG (ptr_mode, 1);
10819 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10820 pointing where to return struct values. */
10821 if (call_used_regs[3] && ! fixed_regs[3])
10822 scratch2 = gen_rtx_REG (Pmode, 3);
10825 this_value = plus_constant (Pmode, this_rtx, delta);
10826 if (vcall_offset
10827 && (simple_add || scratch0 != scratch1)
10828 && strict_memory_address_p (ptr_mode, this_value))
10830 emit_load_ptr (scratch0, this_value);
10831 did_load = 1;
10834 if (!delta)
10835 ; /* Do nothing. */
10836 else if (simple_add)
10837 emit_move_insn (this_rtx, this_value);
10838 else
10840 emit_move_insn (scratch1, GEN_INT (delta));
10841 emit_insn (gen_add2_insn (this_rtx, scratch1));
10844 if (vcall_offset)
10846 rtx offset_addr;
10848 if (!did_load)
10849 emit_load_ptr (scratch0, this_rtx);
10851 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
10852 if (strict_memory_address_p (ptr_mode, offset_addr))
10853 ; /* Do nothing. */
10854 else if (scratch0 != scratch1)
10856 /* scratch0 != scratch1, and we have indexed loads. Get better
10857 schedule by loading the offset into r1 and using an indexed
10858 load - then the load of r1 can issue before the load from
10859 (this_rtx + delta) finishes. */
10860 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10861 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10863 else if (CONST_OK_FOR_ADD (vcall_offset))
10865 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10866 offset_addr = scratch0;
10868 else if (scratch0 != scratch1)
10870 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10871 emit_insn (gen_add2_insn (scratch0, scratch1));
10872 offset_addr = scratch0;
10874 else
10875 gcc_unreachable (); /* FIXME */
10876 emit_load_ptr (scratch0, offset_addr);
10878 if (Pmode != ptr_mode)
10879 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10880 emit_insn (gen_add2_insn (this_rtx, scratch0));
10883 /* Generate a tail call to the target function. */
10884 if (! TREE_USED (function))
10886 assemble_external (function);
10887 TREE_USED (function) = 1;
10889 funexp = XEXP (DECL_RTL (function), 0);
10890 /* If the function is overridden, so is the thunk, hence we don't
10891 need GOT addressing even if this is a public symbol. */
10892 #if 0
10893 if (TARGET_SH1 && ! flag_weak)
10894 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10895 else
10896 #endif
10897 if (TARGET_SH2 && flag_pic)
10899 if (TARGET_FDPIC)
10901 sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
10902 XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
10904 else
10906 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10907 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10910 else
10912 emit_move_insn (scratch2, funexp);
10913 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10914 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10916 sibcall = emit_call_insn (sibcall);
10917 SIBLING_CALL_P (sibcall) = 1;
10918 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10919 emit_barrier ();
10921 /* Run just enough of rest_of_compilation to do scheduling and get
10922 the insns emitted. Note that use_thunk calls
10923 assemble_start_function and assemble_end_function. */
10925 insns = get_insns ();
10927 if (optimize > 0)
10929 if (! cfun->cfg)
10930 init_flow (cfun);
10931 split_all_insns_noflow ();
10934 sh_reorg ();
10935 shorten_branches (insns);
10936 final_start_function (insns, file, 1);
10937 final (insns, file, 1);
10938 final_end_function ();
10940 reload_completed = 0;
10941 epilogue_completed = 0;
10944 /* Return an RTX pair for the address and call site label of a function
10945 NAME of kind KIND, placing the result in TARGET if not NULL. For
10946 SFUNC_STATIC, if FDPIC, the LAB member of result will be set to
10947 (const_int 0) if jsr should be used, or a label_ref if bsrf should
10948 be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the
10949 address of the function itself, not a function descriptor, so they
10950 can only be used with functions not using the FDPIC register that
10951 are known to be called directory without a PLT entry. */
10953 function_symbol_result
10954 function_symbol (rtx target, const char *name, sh_function_kind kind)
10956 /* If this is not an ordinary function, the name usually comes from a
10957 string literal or an sprintf buffer. Make sure we use the same
10958 string consistently, so that cse will be able to unify address loads. */
10959 if (kind != FUNCTION_ORDINARY)
10960 name = IDENTIFIER_POINTER (get_identifier (name));
10961 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
10962 rtx lab = const0_rtx;
10963 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10964 if (flag_pic)
10965 switch (kind)
10967 case FUNCTION_ORDINARY:
10968 break;
10969 case SFUNC_GOT:
10971 rtx reg = target ? target : gen_reg_rtx (Pmode);
10973 emit_insn (gen_symGOT2reg (reg, sym));
10974 sym = reg;
10975 break;
10977 case SFUNC_STATIC:
10979 rtx reg = target ? target : gen_reg_rtx (Pmode);
10981 if (TARGET_FDPIC)
10983 /* We use PC-relative calls, since GOTOFF can only refer
10984 to writable data. This works along with sh_sfunc_call. */
10985 lab = PATTERN (gen_call_site ());
10986 emit_insn (gen_sym_label2reg (reg, sym, lab));
10988 else
10990 /* ??? To allow cse to work, we use GOTOFF relocations.
10991 we could add combiner patterns to transform this into
10992 straight pc-relative calls with sym2PIC / bsrf when
10993 label load and function call are still 1:1 and in the
10994 same basic block during combine. */
10995 emit_insn (gen_symGOTOFF2reg (reg, sym));
10998 sym = reg;
10999 break;
11002 if (target && sym != target)
11004 emit_move_insn (target, sym);
11005 return function_symbol_result (target, lab);
11007 return function_symbol_result (sym, lab);
11010 /* Find the number of the first general purpose register in S that
11011 is not set. */
11012 static int
11013 scavenge_reg (HARD_REG_SET *s)
11015 for (int r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11016 if (TEST_HARD_REG_BIT (*s, r))
11017 return r;
11018 return -1;
11022 sh_get_pr_initial_val (void)
11024 /* If we haven't finished rtl generation, there might be a nonlocal label
11025 that we haven't seen yet.
11026 ??? get_hard_reg_initial_val fails if it is called after register
11027 allocation has started, unless it has been called before for the
11028 same register. And even then, we end in trouble if we didn't use
11029 the register in the same basic block before. So call
11030 get_hard_reg_initial_val now and wrap it in an unspec if we might
11031 need to replace it. */
11032 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11033 combine can put the pseudo returned by get_hard_reg_initial_val into
11034 instructions that need a general purpose registers, which will fail to
11035 be recognized when the pseudo becomes allocated to PR. */
11036 rtx val = get_hard_reg_initial_val (Pmode, PR_REG);
11037 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11040 bool
11041 sh_expand_t_scc (rtx operands[])
11043 enum rtx_code code = GET_CODE (operands[1]);
11044 rtx target = operands[0];
11045 rtx op0 = operands[2];
11046 rtx op1 = operands[3];
11047 rtx result = target;
11049 if (!REG_P (op0) || REGNO (op0) != T_REG
11050 || !CONST_INT_P (op1))
11051 return false;
11052 if (!REG_P (result))
11053 result = gen_reg_rtx (SImode);
11054 HOST_WIDE_INT val = INTVAL (op1);
11055 if ((code == EQ && val == 1) || (code == NE && val == 0))
11056 emit_insn (gen_movt (result, get_t_reg_rtx ()));
11057 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11058 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
11059 else if (code == EQ || code == NE)
11060 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11061 else
11062 return false;
11063 if (result != target)
11064 emit_move_insn (target, result);
11065 return true;
11068 /* INSN is an sfunc; return the rtx that describes the address used. */
11069 static rtx
11070 extract_sfunc_addr (rtx insn)
11072 rtx pattern = PATTERN (insn);
11073 const int len = XVECLEN (pattern, 0);
11074 for (int i = 0; i < len; i++)
11076 rtx part = XVECEXP (pattern, 0, i);
11077 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11078 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11079 return XEXP (part, 0);
11081 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11082 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11085 /* Verify that the register in use_sfunc_addr still agrees with the address
11086 used in the sfunc. This prevents fill_slots_from_thread from changing
11087 use_sfunc_addr.
11088 INSN is the use_sfunc_addr instruction, and REG is the register it
11089 guards. */
11090 bool
11091 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
11093 /* Search for the sfunc. It should really come right after INSN. */
11094 while ((insn = NEXT_INSN (insn)))
11096 if (LABEL_P (insn) || JUMP_P (insn))
11097 break;
11098 if (! INSN_P (insn))
11099 continue;
11101 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
11102 insn = seq->insn (0);
11103 if (GET_CODE (PATTERN (insn)) != PARALLEL
11104 || get_attr_type (insn) != TYPE_SFUNC)
11105 continue;
11106 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11108 gcc_unreachable ();
11111 /* This function returns a constant rtx that represents 2**15 / pi in
11112 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
11113 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
11114 static GTY(()) rtx sh_fsca_sf2int_rtx;
11117 sh_fsca_sf2int (void)
11119 if (! sh_fsca_sf2int_rtx)
11121 REAL_VALUE_TYPE rv;
11123 real_from_string (&rv, "10430.378350470453");
11124 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11127 return sh_fsca_sf2int_rtx;
11130 /* This function returns a constant rtx that represents pi / 2**15 in
11131 SFmode. It's used to scale SFmode angles, in radians, to a
11132 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
11133 maps to 0x10000. */
11134 static GTY(()) rtx sh_fsca_int2sf_rtx;
11137 sh_fsca_int2sf (void)
11139 if (! sh_fsca_int2sf_rtx)
11141 REAL_VALUE_TYPE rv;
11143 real_from_string (&rv, "9.587379924285257e-5");
11144 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11147 return sh_fsca_int2sf_rtx;
11150 /* Initialize the CUMULATIVE_ARGS structure. */
11151 void
11152 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11153 tree fntype,
11154 rtx libname ATTRIBUTE_UNUSED,
11155 tree fndecl,
11156 signed int n_named_args,
11157 machine_mode mode)
11159 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11160 pcum->free_single_fp_reg = 0;
11161 pcum->outgoing = n_named_args != -1;
11163 /* FIXME: Should we check TARGET_HITACHI here ??? */
11164 pcum->renesas_abi = sh_attr_renesas_p (fntype);
11166 if (fntype)
11168 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11169 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11170 pcum->prototype_p = prototype_p (fntype);
11171 pcum->arg_count [(int) SH_ARG_INT] = false;
11173 else
11175 pcum->arg_count [(int) SH_ARG_INT] = 0;
11176 pcum->prototype_p = false;
11177 if (mode != VOIDmode)
11179 /* If the default ABI is the Renesas ABI then all library
11180 calls must assume that the library will be using the
11181 Renesas ABI. So if the function would return its result
11182 in memory then we must force the address of this memory
11183 block onto the stack. Ideally we would like to call
11184 targetm.calls.return_in_memory() here but we do not have
11185 the TYPE or the FNDECL available so we synthesize the
11186 contents of that function as best we can. */
11187 pcum->force_mem =
11188 (TARGET_DEFAULT & MASK_HITACHI)
11189 && (mode == BLKmode
11190 || (GET_MODE_SIZE (mode) > 4
11191 && !(mode == DFmode
11192 && TARGET_FPU_DOUBLE)));
11194 else
11195 pcum->force_mem = false;
11200 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
11202 enum rtx_code code = TRUNCATE;
11204 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11206 rtx inner = XEXP (x, 0);
11207 machine_mode inner_mode = GET_MODE (inner);
11209 if (inner_mode == mode)
11210 return inner;
11211 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11212 x = inner;
11213 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11214 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11216 code = GET_CODE (x);
11217 x = inner;
11220 return gen_rtx_fmt_e (code, mode, x);
11223 /* Load and store depend on the highpart of the address. However,
11224 set_attr_alternative does not give well-defined results before reload,
11225 so we must look at the rtl ourselves to see if any of the feeding
11226 registers is used in a memref.
11228 Return true iff INSN contains a MEM. */
11229 bool
11230 sh_contains_memref_p (rtx insn)
11232 subrtx_iterator::array_type array;
11233 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
11234 if (MEM_P (*iter))
11235 return true;
11236 return false;
11239 /* Return true iff INSN loads a banked register. */
11240 bool
11241 sh_loads_bankedreg_p (rtx insn)
11243 if (GET_CODE (PATTERN (insn)) == SET)
11245 rtx op = SET_DEST (PATTERN(insn));
11246 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11247 return true;
11250 return false;
11253 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
11254 static reg_class_t
11255 sh_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
11257 return rclass;
11260 /* Implement TARGET_SECONDARY_RELOAD. */
11261 static reg_class_t
11262 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11263 machine_mode mode, secondary_reload_info *sri)
11265 enum reg_class rclass = (enum reg_class) rclass_i;
11267 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
11268 && REG_P (XEXP (XEXP (x, 0), 0))
11269 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
11270 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11272 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
11273 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11275 if (REG_P (x) && REGNO (x) == GBR_REG)
11276 return NO_REGS;
11278 if (in_p)
11280 if (REGCLASS_HAS_FP_REG (rclass)
11281 && immediate_operand ((x), mode)
11282 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
11283 switch (mode)
11285 case E_SFmode:
11286 sri->icode = CODE_FOR_reload_insf__frn;
11287 return NO_REGS;
11288 case E_DFmode:
11289 sri->icode = CODE_FOR_reload_indf__frn;
11290 return NO_REGS;
11291 case E_SImode:
11292 /* ??? If we knew that we are in the appropriate mode -
11293 single precision - we could use a reload pattern directly. */
11294 return FPUL_REGS;
11295 default:
11296 abort ();
11298 if (rclass == FPUL_REGS
11299 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11300 || REGNO (x) == T_REG))
11301 || GET_CODE (x) == PLUS))
11302 return GENERAL_REGS;
11303 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11305 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11306 return GENERAL_REGS;
11307 else if (mode == SFmode)
11308 return FP_REGS;
11309 sri->icode = CODE_FOR_reload_insi__i_fpul;
11310 return NO_REGS;
11312 if (rclass == FPSCR_REGS
11313 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11314 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11315 return GENERAL_REGS;
11316 } /* end of input-only processing. */
11318 if (((REGCLASS_HAS_FP_REG (rclass)
11319 && (REG_P (x)
11320 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11321 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11322 && TARGET_FMOVD))))
11323 || (REGCLASS_HAS_GENERAL_REG (rclass)
11324 && REG_P (x)
11325 && FP_REGISTER_P (REGNO (x))))
11326 && (mode == SFmode || mode == SImode))
11327 return FPUL_REGS;
11328 if ((rclass == FPUL_REGS
11329 || (REGCLASS_HAS_FP_REG (rclass) && mode == SImode))
11330 && (MEM_P (x)
11331 || (REG_P (x)
11332 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11333 || REGNO (x) == T_REG
11334 || system_reg_operand (x, VOIDmode)))))
11336 if (rclass == FPUL_REGS)
11337 return GENERAL_REGS;
11338 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
11341 if ((rclass == MAC_REGS || rclass == PR_REGS)
11342 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11343 && rclass != REGNO_REG_CLASS (REGNO (x)))
11344 return GENERAL_REGS;
11346 /* If here fall back to loading FPUL register through general registers.
11347 This case can happen when movsi_ie insn is picked initially to
11348 load/store the FPUL register from/to another register, and then the
11349 other register is allocated on the stack. */
11350 if (rclass == FPUL_REGS && true_regnum (x) == -1)
11351 return GENERAL_REGS;
11353 /* Force mov.b / mov.w displacement addressing insn to use R0 as
11354 the other operand.
11355 On SH2A could also just leave it alone here, which would result in a
11356 4 byte move insn being generated instead. However, for this to work
11357 the insns must have the appropriate alternatives. */
11358 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11359 && satisfies_constraint_Sdd (x)
11360 && sh_disp_addr_displacement (x)
11361 <= sh_max_mov_insn_displacement (mode, false))
11362 return R0_REGS;
11364 /* When reload is trying to address a QImode or HImode subreg on the stack,
11365 force any subreg byte into R0_REGS, as this is going to become a
11366 displacement address.
11367 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
11368 is on the stack, the memref to it might already require a displacement
11369 and that has to be added to the final address. At this point we don't
11370 know the cumulative displacement so we assume the worst case. */
11371 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11372 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
11373 return R0_REGS;
11375 return NO_REGS;
11378 /* Return true if SUBST can't safely replace its equivalent during RA. */
11379 static bool
11380 sh_cannot_substitute_mem_equiv_p (rtx)
11382 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
11383 uses R0 and may cause spill failure when R0 is already used.
11384 We have to return true for that case at least.
11385 Moreover SH has strong R0 parity and also have not enough numbers of
11386 the hard registers to make the equiv substitution win in the size
11387 and the speed on average working sets. The pseudos produced to
11388 hold the equiv values can't get good hard registers for bad cases
11389 and end up memory save/restore insns which make the code worse. */
11390 return true;
11393 /* Return true if DISP can be legitimized. */
11394 static bool
11395 sh_legitimize_address_displacement (rtx *disp, rtx *offs,
11396 machine_mode mode)
11398 if ((TARGET_FPU_DOUBLE && mode == DFmode)
11399 || (TARGET_SH2E && mode == SFmode))
11400 return false;
11402 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, INTVAL (*disp));
11403 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
11405 *disp = adj.mov_disp;
11406 *offs = adj.offset_adjust;
11407 return true;
11410 return false;
11413 /* Return true if movsf insn should be splited with an additional
11414 register. */
11415 bool
11416 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
11418 /* op0 == op1 */
11419 if (rtx_equal_p (op0, op1))
11420 return true;
11421 /* fy, FQ, reg */
11422 if (GET_CODE (op1) == CONST_DOUBLE
11423 && ! satisfies_constraint_G (op1)
11424 && ! satisfies_constraint_H (op1)
11425 && REG_P (op0)
11426 && REG_P (op2))
11427 return true;
11428 /* f, r, y */
11429 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
11430 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
11431 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11432 return true;
11433 /* r, f, y */
11434 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
11435 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
11436 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11437 return true;
11439 return false;
11442 static void
11443 sh_conditional_register_usage (void)
11445 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
11446 if (! VALID_REGISTER_P (regno))
11447 fixed_regs[regno] = call_used_regs[regno] = 1;
11448 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
11449 if (flag_pic)
11451 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11452 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11454 if (TARGET_FDPIC)
11456 fixed_regs[PIC_REG] = 1;
11457 call_used_regs[PIC_REG] = 1;
11458 call_really_used_regs[PIC_REG] = 1;
11460 /* Renesas saves and restores mac registers on call. */
11461 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
11463 call_really_used_regs[MACH_REG] = 0;
11464 call_really_used_regs[MACL_REG] = 0;
11467 for (int regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
11468 if (! fixed_regs[regno] && call_really_used_regs[regno])
11469 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
11471 call_really_used_regs[FPSCR_MODES_REG] = 0;
11472 call_really_used_regs[FPSCR_STAT_REG] = 0;
11475 /* Implement TARGET_LEGITIMATE_CONSTANT_P
11477 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
11478 static bool
11479 sh_legitimate_constant_p (machine_mode mode, rtx x)
11481 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
11483 rtx base, offset;
11484 split_const (x, &base, &offset);
11486 if (GET_CODE (base) == SYMBOL_REF
11487 && !offset_within_block_p (base, INTVAL (offset)))
11488 return false;
11491 if (TARGET_FDPIC
11492 && (SYMBOLIC_CONST_P (x)
11493 || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
11494 && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
11495 return false;
11497 return GET_CODE (x) != CONST_DOUBLE
11498 || mode == DFmode || mode == SFmode
11499 || mode == DImode || GET_MODE (x) == VOIDmode;
11502 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11504 static void
11505 sh_init_sync_libfuncs (void)
11507 init_sync_libfuncs (UNITS_PER_WORD);
11510 /* Return true if it is appropriate to emit `ret' instructions in the
11511 body of a function. */
11512 bool
11513 sh_can_use_simple_return_p (void)
11515 if (! reload_completed || frame_pointer_needed)
11516 return false;
11518 /* Moving prologue around does't reduce the size. */
11519 if (optimize_function_for_size_p (cfun))
11520 return false;
11522 /* Finally, allow for pr save. */
11523 HARD_REG_SET live_regs_mask;
11524 int d = calc_live_regs (&live_regs_mask);
11526 if (rounded_frame_size (d) > 4)
11527 return false;
11529 return true;
11532 /*------------------------------------------------------------------------------
11533 Address mode optimization support code
11536 typedef HOST_WIDE_INT disp_t;
11537 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
11538 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
11539 static const disp_t INVALID_DISP = MAX_DISP;
11541 /* A memory reference which is described by a base register and a
11542 displacement. */
11543 class base_reg_disp
11545 public:
11546 base_reg_disp (rtx br, disp_t d);
11548 bool is_reg (void) const;
11549 bool is_disp (void) const;
11550 rtx reg (void) const;
11551 disp_t disp (void) const;
11553 private:
11554 rtx reg_;
11555 disp_t disp_;
11558 inline
11559 base_reg_disp::base_reg_disp (rtx br, disp_t d)
11560 : reg_ (br), disp_ (d)
11564 inline bool
11565 base_reg_disp::is_reg (void) const
11567 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
11570 inline bool
11571 base_reg_disp::is_disp (void) const
11573 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
11576 inline rtx
11577 base_reg_disp::reg (void) const
11579 return reg_;
11582 inline disp_t
11583 base_reg_disp::disp (void) const
11585 return disp_;
11588 /* Find the base register and calculate the displacement for a given
11589 address rtx 'x'. */
11590 static base_reg_disp
11591 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
11592 rtx base_reg = NULL)
11594 if (REG_P (x))
11596 if (REGNO (x) == GBR_REG)
11597 return base_reg_disp (x, disp);
11599 /* We've reached a hard-reg. This is probably the point where
11600 function args are copied to pseudos. Do not go any further and
11601 stick to the pseudo. If the original mem addr was in a hard reg
11602 from the beginning, it will become the base reg. */
11603 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
11604 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
11606 /* Find the def of the reg and trace it. If there are more than one
11607 defs and they are not the same, assume it's not safe to proceed. */
11608 rtx_insn* last_i = NULL;
11609 rtx last_set = NULL;
11610 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
11611 d = DF_REF_NEXT_REG (d))
11613 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
11615 /* Accept multiple defs, as long as they are equal. */
11616 if (last_set == NULL || rtx_equal_p (last_set, set))
11618 last_i = DF_REF_INSN (d);
11619 last_set = set;
11621 else
11623 last_i = NULL;
11624 last_set = NULL;
11625 break;
11629 if (last_set != NULL && last_i != NULL)
11630 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
11631 XEXP (last_set, 0));
11633 /* When here, no previous insn was found that sets the reg.
11634 The input reg is already the base reg. */
11635 return base_reg_disp (x, disp);
11638 else if (GET_CODE (x) == PLUS)
11640 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
11641 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
11643 /* Either left or right val must be a reg.
11644 We don't handle the case of 'reg + reg' here. */
11645 if (left_val.is_reg () && right_val.is_disp ())
11646 return base_reg_disp (left_val.reg (), left_val.disp ()
11647 + right_val.disp () + disp);
11648 else if (right_val.is_reg () && left_val.is_disp ())
11649 return base_reg_disp (right_val.reg (), right_val.disp ()
11650 + left_val.disp () + disp);
11651 else
11652 return base_reg_disp (base_reg, disp);
11655 else if (CONST_INT_P (x))
11656 return base_reg_disp (NULL, disp + INTVAL (x));
11658 /* Didn't find anything useful. */
11659 return base_reg_disp (base_reg, disp);
11662 /* Given an insn and a memory operand, try to find an equivalent GBR
11663 based memory address and return the corresponding new memory address.
11664 Return NULL_RTX if not found. */
11666 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
11668 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
11669 return NULL_RTX;
11671 /* Leave post/pre inc/dec or any other side effect addresses alone. */
11672 if (side_effects_p (XEXP (mem, 0)))
11673 return NULL_RTX;
11675 /* When not optimizing there might be no dataflow available. */
11676 if (df == NULL)
11677 return NULL_RTX;
11679 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
11681 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
11683 /* If GBR is marked as call clobbered we bail out if we see a call.
11684 FIXME: Actually should check if this mem refers to the gbr value
11685 before or after the call. If there is a store_gbr preceeding this
11686 mem, it's safe to use GBR for this mem.
11688 If GBR is not marked as call clobbered, but there is some other
11689 def than a call, it's probably a load_gbr upon which we also
11690 bail out to be on the safe side.
11691 FIXME: Should check if we have a use-after-def case, such as
11692 the call case above. */
11693 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
11694 d = DF_REF_NEXT_REG (d))
11696 if (CALL_P (DF_REF_INSN (d)))
11698 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
11699 return NULL_RTX;
11700 else
11701 continue;
11703 else
11704 return NULL_RTX;
11707 rtx disp = GEN_INT (gbr_disp.disp ());
11708 if (gbr_displacement (disp, GET_MODE (mem)))
11709 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
11712 return NULL_RTX;
11715 /*------------------------------------------------------------------------------
11716 Manual insn combine support code.
11719 /* Return true if the specified insn contains any UNSPECs or
11720 UNSPEC_VOLATILEs. */
11721 static bool
11722 sh_unspec_insn_p (rtx x)
11724 subrtx_iterator::array_type array;
11725 FOR_EACH_SUBRTX (i, array, x, ALL)
11726 if (*i != NULL
11727 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
11728 return true;
11730 return false;
11733 /* Return true if the register operands of the specified insn are modified
11734 between the specified from and to insns (exclusive of those two). */
11735 bool
11736 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
11737 const rtx_insn* from,
11738 const rtx_insn* to)
11740 /* FIXME: Return true for multiple sets for now. */
11741 rtx s = single_set (operands_insn);
11742 if (s == NULL_RTX)
11743 return true;
11745 subrtx_iterator::array_type array;
11746 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
11747 if (*i != NULL &&
11748 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
11749 return true;
11751 return false;
11754 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
11755 negates the T bit and stores the result in the T bit. */
11756 bool
11757 sh_is_nott_insn (const rtx_insn* i)
11759 return i != NULL && GET_CODE (PATTERN (i)) == SET
11760 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
11761 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
11765 sh_movt_set_dest (const rtx_insn* i)
11767 return i == NULL ? NULL : sh_movt_set_dest (PATTERN (i));
11771 sh_movt_set_dest (const_rtx pat)
11773 return GET_CODE (pat) == SET
11774 && arith_reg_dest (XEXP (pat, 0), SImode)
11775 && t_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11778 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
11779 that stores the negated T bit in a register, and return the destination
11780 register rtx, or null. */
11782 sh_movrt_set_dest (const rtx_insn* i)
11784 return i == NULL ? NULL : sh_movrt_set_dest (PATTERN (i));
11788 sh_movrt_set_dest (const_rtx pat)
11790 /* The negc movrt replacement is inside a parallel. */
11791 if (GET_CODE (pat) == PARALLEL)
11792 pat = XVECEXP (pat, 0, 0);
11794 return GET_CODE (pat) == SET
11795 && arith_reg_dest (XEXP (pat, 0), SImode)
11796 && negt_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11800 /* Given an insn and a reg number, tell whether the reg dies or is unused
11801 after the insn. */
11802 bool
11803 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
11805 return find_regno_note (i, REG_DEAD, regno) != NULL
11806 || find_regno_note (i, REG_UNUSED, regno) != NULL;
11809 /* Given an insn and a reg number, remove reg dead or reg unused notes to
11810 mark it as being used after the insn. */
11811 void
11812 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
11814 if (rtx n = find_regno_note (i, REG_DEAD, regno))
11815 remove_note (i, n);
11816 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
11817 remove_note (i, n);
11820 /* Given an insn check if it contains any post/pre inc/dec mem operands and
11821 add the REG_INC notes accordingly.
11822 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
11823 FIXME: This function is currently used by peephole2 patterns because
11824 the peephole2 pass does not preserve REG_INC notes. If the notes
11825 are dropped the following passes will do wrong things. */
11826 rtx_insn*
11827 sh_check_add_incdec_notes (rtx_insn* i)
11829 struct for_each_inc_dec_clb
11831 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
11832 rtx dest, rtx src ATTRIBUTE_UNUSED,
11833 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
11835 gcc_assert (REG_P (dest));
11837 rtx_insn* i = (rtx_insn*)arg;
11838 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
11839 add_reg_note (i, REG_INC, dest);
11841 return 0;
11845 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
11846 return i;
11849 /* Given a move insn destiation and a source, make sure that the move source
11850 operand is not a post-inc mem load with the same address reg as the
11851 destination. Returns the modified source operand with the post-inc removed
11852 if necessary. */
11854 sh_remove_overlapping_post_inc (rtx dst, rtx src)
11856 if (!MEM_P (src))
11857 return src;
11859 rtx addr = XEXP (src, 0);
11861 if (GET_CODE (addr) == POST_INC
11862 && reg_overlap_mentioned_p (XEXP (addr, 0), dst))
11863 return replace_equiv_address (src, XEXP (addr, 0));
11865 gcc_assert (GET_CODE (addr) != POST_MODIFY);
11866 return src;
11869 /* Emit a move insn that is safe to be used in peephole patterns. */
11870 rtx_insn*
11871 sh_peephole_emit_move_insn (rtx dst, rtx src)
11873 return sh_check_add_incdec_notes (
11874 emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src)));
11877 /* Given an op rtx and an insn, try to find out whether the result of the
11878 specified op consists only of logical operations on T bit stores. */
11879 bool
11880 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
11882 if (!logical_operator (op, SImode))
11883 return false;
11885 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
11886 int op_is_t_count = 0;
11888 for (int i = 0; i < 2; ++i)
11890 if (t_reg_operand (ops[i], VOIDmode)
11891 || negt_reg_operand (ops[i], VOIDmode))
11892 op_is_t_count++;
11894 else
11896 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
11897 prev_nonnote_insn_bb);
11898 if (op_set.set_src == NULL_RTX)
11899 continue;
11901 if (t_reg_operand (op_set.set_src, VOIDmode)
11902 || negt_reg_operand (op_set.set_src, VOIDmode)
11903 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
11904 op_is_t_count++;
11908 return op_is_t_count == 2;
11911 /* Given the operand that is extended in a sign/zero extend insn, and the
11912 insn, try to figure out whether the sign/zero extension can be replaced
11913 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
11914 NULL_RTX otherwise. */
11916 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
11918 if (REG_P (extended_op))
11919 extended_op = extended_op;
11920 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
11921 extended_op = SUBREG_REG (extended_op);
11922 else
11923 return NULL_RTX;
11925 /* Reg moves must be of the same mode. */
11926 if (GET_MODE (extended_op) != SImode)
11927 return NULL_RTX;
11929 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
11930 if (s.set_src == NULL_RTX)
11931 return NULL_RTX;
11933 if (t_reg_operand (s.set_src, VOIDmode)
11934 || negt_reg_operand (s.set_src, VOIDmode))
11935 return extended_op;
11937 /* If the zero extended reg was formed by a logical operation, check the
11938 operands of the logical operation. If both originated from T bit
11939 stores the zero extension can be eliminated. */
11940 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
11941 return extended_op;
11943 return NULL_RTX;
11946 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
11947 figure out whether it should be converted into a movt-xor sequence in
11948 the movrt_negc splitter.
11949 Returns true if insns have been modified and the splitter has succeeded. */
11950 bool
11951 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
11953 /* In cases such as
11954 tst r4,r4
11955 mov #-1,r1
11956 negc r1,r1
11957 tst r4,r4
11958 we can replace the T bit clobbering negc with a movt-xor sequence and
11959 eliminate the redundant comparison.
11960 Because the xor insn depends on register allocation results, allow this
11961 only before reload. */
11962 if (!can_create_pseudo_p ())
11963 return false;
11965 set_of_reg t_before_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
11966 prev_nonnote_insn_bb);
11967 set_of_reg t_after_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
11968 next_nonnote_insn_bb);
11970 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
11971 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
11972 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11973 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
11974 t_before_negc.insn,
11975 t_after_negc.insn)
11976 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11977 && !sh_unspec_insn_p (t_after_negc.insn)
11978 && !volatile_insn_p (PATTERN (t_after_negc.insn))
11979 && !side_effects_p (PATTERN (t_after_negc.insn))
11980 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
11982 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
11983 set_insn_deleted (t_after_negc.insn);
11984 return true;
11986 else
11987 return false;
11990 /* Given a reg and the current insn, see if the value of the reg originated
11991 from a sign or zero extension and return the discovered information. */
11992 sh_extending_set_of_reg
11993 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
11995 if (reg == NULL)
11996 return sh_extending_set_of_reg (curr_insn);
11998 if (SUBREG_P (reg))
11999 reg = SUBREG_REG (reg);
12001 if (!REG_P (reg))
12002 return sh_extending_set_of_reg (curr_insn);
12004 /* FIXME: Also search the predecessor basic blocks. It seems that checking
12005 only the adjacent predecessor blocks would cover most of the cases.
12006 Also try to look through the first extension that we hit. There are some
12007 cases, where a zero_extend is followed an (implicit) sign_extend, and it
12008 fails to see the sign_extend. */
12009 sh_extending_set_of_reg result =
12010 sh_find_set_of_reg (reg, curr_insn, prev_nonnote_insn_bb, true);
12012 if (result.set_src != NULL)
12014 if (GET_CODE (result.set_src) == SIGN_EXTEND
12015 || GET_CODE (result.set_src) == ZERO_EXTEND)
12017 if (dump_file)
12018 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12019 "explicitly sign/zero extended in insn %d\n",
12020 REGNO (reg), INSN_UID (result.insn));
12021 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
12022 result.ext_code = GET_CODE (result.set_src);
12024 else if (MEM_P (result.set_src)
12025 && (GET_MODE (result.set_src) == QImode
12026 || GET_MODE (result.set_src) == HImode)
12027 && !sh_unspec_insn_p (result.insn))
12029 /* On SH QIHImode memory loads always sign extend. However, in
12030 some cases where it seems that the higher bits are not
12031 interesting, the loads will not be expanded as sign extending
12032 insns, but as QIHImode loads into QIHImode regs. We report that
12033 the reg has been sign extended by the mem load. When it is used
12034 as such, we must convert the mem load into a sign extending insn,
12035 see also sh_extending_set_of_reg::use_as_extended_reg. */
12036 if (dump_file)
12037 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12038 "implicitly sign extended in insn %d\n",
12039 REGNO (reg), INSN_UID (result.insn));
12040 result.from_mode = GET_MODE (result.set_src);
12041 result.ext_code = SIGN_EXTEND;
12045 return result;
12048 /* Given a reg that is known to be sign or zero extended at some insn,
12049 take the appropriate measures so that the extended value can be used as
12050 a reg at the specified insn and return the resulting reg rtx. */
12052 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
12054 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
12055 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
12056 gcc_assert (from_mode == QImode || from_mode == HImode);
12058 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
12060 if (dump_file)
12061 fprintf (dump_file,
12062 "use_as_extended_reg: converting non-extending mem load in "
12063 "insn %d into sign-extending load\n", INSN_UID (insn));
12065 rtx r = gen_reg_rtx (SImode);
12066 rtx_insn* i0;
12067 if (from_mode == QImode)
12068 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
12069 else if (from_mode == HImode)
12070 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
12071 else
12072 gcc_unreachable ();
12074 emit_insn_after (
12075 gen_move_insn (XEXP (set_rtx, 0),
12076 gen_lowpart (GET_MODE (set_src), r)), i0);
12077 set_insn_deleted (insn);
12078 return r;
12080 else
12082 rtx extension_dst = XEXP (set_rtx, 0);
12083 if (GET_MODE (extension_dst) != SImode)
12084 extension_dst = simplify_gen_subreg (SImode, extension_dst,
12085 GET_MODE (extension_dst), 0);
12086 if (modified_between_p (extension_dst, insn, use_at_insn))
12088 if (dump_file)
12089 fprintf (dump_file,
12090 "use_as_extended_reg: dest reg %d of extending insn %d is "
12091 "modified, inserting a reg-reg copy\n",
12092 REGNO (extension_dst), INSN_UID (insn));
12094 rtx r = gen_reg_rtx (SImode);
12095 emit_insn_after (gen_move_insn (r, extension_dst), insn);
12096 return r;
12098 else
12100 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
12101 return extension_dst;
12106 bool
12107 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
12109 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
12110 && (from_mode == QImode || from_mode == HImode)
12111 && set_src != NULL)
12112 return arith_reg_operand (XEXP (set_src, 0), from_mode);
12113 else
12114 return false;
12118 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
12120 gcc_assert (can_use_as_unextended_reg ());
12122 rtx r = XEXP (set_src, 0);
12123 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
12125 if (modified_between_p (r, insn, use_at_insn))
12127 rtx r1 = gen_reg_rtx (SImode);
12128 emit_insn_after (gen_move_insn (r1, r0), insn);
12129 return r1;
12131 else
12133 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
12134 ? REGNO (SUBREG_REG (r))
12135 : REGNO (r));
12136 return r0;
12140 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
12141 perform the necessary checks on the operands and split it accordingly. */
12142 void
12143 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
12144 int subreg_offset, rtx operands[])
12146 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
12148 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
12149 curr_insn);
12150 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
12151 curr_insn);
12153 /* If one of the operands is known to be zero extended, that's already
12154 sufficient to mask out the unwanted high bits. */
12155 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
12157 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12158 operands[1]));
12159 return;
12161 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
12163 emit_insn (gen_tstsi_t (operands[0],
12164 eop1.use_as_extended_reg (curr_insn)));
12165 return;
12168 /* None of the operands seem to be zero extended.
12169 If both are sign extended it's OK, too. */
12170 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
12171 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
12173 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12174 eop1.use_as_extended_reg (curr_insn)));
12175 return;
12178 /* Otherwise we have to insert a zero extension on one of the operands to
12179 mask out the unwanted high bits.
12180 Prefer the operand that has no known extension. */
12181 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
12182 std::swap (operands[0], operands[1]);
12184 rtx tmp0 = gen_reg_rtx (SImode);
12185 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
12186 GET_MODE (operands[0]), subreg_offset);
12187 emit_insn (subreg_mode == QImode
12188 ? gen_zero_extendqisi2 (tmp0, tmp1)
12189 : gen_zero_extendhisi2 (tmp0, tmp1));
12190 emit_insn (gen_tstsi_t (tmp0, operands[1]));
12193 /* A helper class to increment/decrement a counter variable each time a
12194 function is entered/left. */
12195 class scope_counter
12197 public:
12198 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
12200 ~scope_counter (void)
12202 --m_counter;
12203 gcc_assert (m_counter >= 0);
12206 int count (void) const { return m_counter; }
12208 private:
12209 int& m_counter;
12212 /* Given an rtx x, determine whether the expression can be used to create
12213 an insn that calulates x and stores the result in the T bit.
12214 This is used by the 'treg_set_expr' predicate to construct insns sequences
12215 where T bit results are fed into other insns, such as addc, subc, negc
12216 insns.
12218 FIXME: The patterns that expand 'treg_set_expr' operands tend to
12219 distinguish between 'positive' and 'negative' forms. For now this has to
12220 be done in the preparation code. We could also introduce
12221 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
12222 two different patterns for the 'postive' and 'negative' forms. However,
12223 the total amount of lines of code seems to be about the same and the
12224 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
12225 recog function would need to look inside the expression by temporarily
12226 splitting it. */
12227 static int sh_recog_treg_set_expr_reent_count = 0;
12229 bool
12230 sh_recog_treg_set_expr (rtx op, machine_mode mode)
12232 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
12234 /* Limit the recursion count to avoid nested expressions which we can't
12235 resolve to a single treg set insn. */
12236 if (recursion.count () > 1)
12237 return false;
12239 /* Early accept known possible operands before doing recog. */
12240 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode)
12241 || negt_reg_operand (op, mode))
12242 return true;
12244 /* Early reject impossible operands before doing recog.
12245 There are some (set ((t) (subreg ...))) patterns, but we must be careful
12246 not to allow any invalid reg-reg or mem-reg moves, or else other passes
12247 such as lower-subreg will bail out. Some insns such as SH4A movua are
12248 done with UNSPEC, so must reject those, too, or else it would result
12249 in an invalid reg -> treg move. */
12250 if (CONST_INT_P (op) || register_operand (op, mode)
12251 || memory_operand (op, mode) || sh_unspec_insn_p (op))
12252 return false;
12254 if (!can_create_pseudo_p ())
12255 return false;
12257 /* expand_debug_locations may call this to compute rtx costs at
12258 very early stage. In that case, don't make new insns here to
12259 avoid codegen differences with -g. */
12260 if (currently_expanding_to_rtl)
12261 return false;
12263 /* We are going to invoke recog in a re-entrant way and thus
12264 have to capture its current state and restore it afterwards. */
12265 recog_data_d prev_recog_data = recog_data;
12267 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
12268 SET_PREV_INSN (i) = NULL;
12269 SET_NEXT_INSN (i) = NULL;
12271 /* If the comparison op doesn't have a result mode, set it to SImode. */
12272 machine_mode prev_op_mode = GET_MODE (op);
12273 if (COMPARISON_P (op) && prev_op_mode == VOIDmode)
12274 PUT_MODE (op, SImode);
12276 int result = recog (PATTERN (i), i, 0);
12278 /* It seems there is no insn like that. Create a negated version and
12279 try again. If we hit a negated form, we'll allow that and append a
12280 nott sequence when splitting out the insns. Insns that do the split
12281 can then remove the trailing nott if they know how to deal with it. */
12282 if (result < 0 && COMPARISON_P (op))
12284 machine_mode cmp_mode = GET_MODE (XEXP (op, 0));
12285 if (cmp_mode == VOIDmode)
12286 cmp_mode = GET_MODE (XEXP (op, 1));
12288 rtx_code prev_code = GET_CODE (op);
12289 PUT_CODE (op, reverse_condition (GET_CODE (op)));
12290 result = recog (PATTERN (i), i, 0);
12291 PUT_CODE (op, prev_code);
12294 PUT_MODE (op, prev_op_mode);
12295 recog_data = prev_recog_data;
12296 return result >= 0;
12299 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
12300 This can be used as a condition for insn/split patterns to allow certain
12301 T bit setting patters only to be matched as sub expressions of other
12302 patterns. */
12303 bool
12304 sh_in_recog_treg_set_expr (void)
12306 return sh_recog_treg_set_expr_reent_count > 0;
12309 /* Given an rtx x, which is assumed to be some expression that has been
12310 matched by the 'treg_set_expr' predicate before, split and emit the
12311 insns that are necessary to calculate the expression and store the result
12312 in the T bit.
12313 The splitting is done recursively similar to 'try_split' in emit-rt.c.
12314 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
12315 'delete_insn' which then causes the DF parts to bail out, because we
12316 currently are inside another gen_split* function and would invoke
12317 'try_split' in a reentrant way. */
12318 static std::pair<rtx_insn*, rtx_insn*>
12319 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
12321 if (dump_file)
12323 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
12324 print_rtl_single (dump_file, i);
12325 fprintf (dump_file, "\n");
12328 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
12330 if (seq == NULL)
12331 return std::make_pair (i, i);
12333 /* Avoid infinite splitter loops if any insn of the result matches
12334 the original pattern. */
12335 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
12336 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
12337 return std::make_pair (i, i);
12339 unshare_all_rtl_in_chain (seq);
12341 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
12342 a linked list, replace the single insn with the new insns. */
12343 rtx_insn* seqlast = seq;
12344 while (NEXT_INSN (seqlast) != NULL)
12345 seqlast = NEXT_INSN (seqlast);
12347 if (rtx_insn* iprev = PREV_INSN (i))
12348 SET_NEXT_INSN (iprev) = seq;
12349 if (rtx_insn* inext = NEXT_INSN (i))
12350 SET_PREV_INSN (inext) = seqlast;
12352 SET_PREV_INSN (seq) = PREV_INSN (i);
12353 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
12355 SET_PREV_INSN (i) = NULL;
12356 SET_NEXT_INSN (i) = NULL;
12358 /* Recursively split all insns. */
12359 for (i = seq; ; i = NEXT_INSN (i))
12361 std::pair<rtx_insn*, rtx_insn*> ii =
12362 sh_try_split_insn_simple (i, curr_insn, n + 1);
12363 if (i == seq)
12364 seq = ii.first;
12365 if (i == seqlast)
12367 seqlast = ii.second;
12368 break;
12370 i = ii.first;
12373 return std::make_pair (seq, seqlast);
12376 sh_treg_insns
12377 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
12379 if (t_reg_operand (x, VOIDmode))
12380 return sh_treg_insns ();
12382 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
12384 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
12385 SET_PREV_INSN (i) = NULL;
12386 SET_NEXT_INSN (i) = NULL;
12388 if (dump_file)
12390 fprintf (dump_file, "split_treg_set_expr insn:\n");
12391 print_rtl (dump_file, i);
12392 fprintf (dump_file, "\n");
12395 /* If the insn is not found, we will try a negated form and append
12396 a nott. */
12397 bool append_nott = false;
12399 /* We are going to invoke recog/split_insns in a re-entrant way and thus
12400 have to capture its current state and restore it afterwards. */
12401 recog_data_d prev_recog_data = recog_data;
12403 if (negt_reg_operand (x, GET_MODE (x)))
12405 /* This is a normal movt followed by a nott. It will be converted
12406 into a movrt after initial expansion. */
12407 XEXP (PATTERN (i), 1) = get_t_reg_rtx ();
12408 append_nott = true;
12410 else
12412 /* If the comparison op doesn't have a mode set, set it to SImode. */
12413 if (COMPARISON_P (x) && GET_MODE (x) == VOIDmode)
12414 PUT_MODE (x, SImode);
12416 int insn_code = recog (PATTERN (i), i, 0);
12418 if (insn_code < 0 && COMPARISON_P (x))
12420 machine_mode cmp_mode = GET_MODE (XEXP (x, 0));
12421 if (cmp_mode == VOIDmode)
12422 cmp_mode = GET_MODE (XEXP (x, 1));
12424 PUT_CODE (x, reverse_condition (GET_CODE (x)));
12425 insn_code = recog (PATTERN (i), i, 0);
12426 append_nott = true;
12429 gcc_assert (insn_code >= 0);
12432 /* Try to recursively split the insn. Some insns might refuse to split
12433 any further while we are in the treg_set_expr splitting phase. They
12434 will be emitted as part of the outer insn and then split again. */
12435 std::pair<rtx_insn*, rtx_insn*> insnlist =
12436 sh_try_split_insn_simple (i, curr_insn);
12438 /* Restore recog state. */
12439 recog_data = prev_recog_data;
12441 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
12442 ? insnlist.second
12443 : NULL;
12444 if (dump_file)
12446 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
12447 print_rtl (dump_file, insnlist.first);
12448 fprintf (dump_file, "\n");
12450 if (nott_insn != NULL)
12451 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
12454 emit_insn (insnlist.first);
12456 if (nott_insn != NULL && append_nott)
12458 if (dump_file)
12459 fprintf (dump_file, "removing trailing nott\n");
12460 remove_insn (nott_insn);
12461 nott_insn = NULL;
12462 append_nott = false;
12465 if (append_nott)
12466 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
12468 rtx_insn* first_insn = get_insns ();
12470 if (dump_file)
12472 fprintf (dump_file, "resulting insns:\n");
12473 print_rtl (dump_file, first_insn);
12474 fprintf (dump_file, "\n");
12477 return sh_treg_insns (first_insn, nott_insn);
12480 /*------------------------------------------------------------------------------
12481 Mode switching support code.
12484 static void
12485 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
12486 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
12488 if ((TARGET_SH4A_FP || TARGET_SH4_300)
12489 && prev_mode != FP_MODE_NONE && prev_mode != mode)
12491 emit_insn (gen_toggle_pr ());
12492 if (TARGET_FMOVD)
12493 emit_insn (gen_toggle_sz ());
12495 else if (mode != FP_MODE_NONE)
12497 rtx tmp = gen_reg_rtx (SImode);
12498 emit_insn (gen_sts_fpscr (tmp));
12499 rtx i = NULL;
12501 const unsigned HOST_WIDE_INT fpbits =
12502 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
12504 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
12505 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12506 else if (mode == FP_MODE_SINGLE)
12507 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
12508 else if (mode == FP_MODE_DOUBLE)
12509 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12510 else
12511 gcc_unreachable ();
12513 emit_insn (i);
12514 emit_insn (gen_lds_fpscr (tmp));
12518 static int
12519 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
12521 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
12524 static int
12525 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
12527 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
12528 get_attr_fp_set (insn) != FP_SET_NONE)
12529 return (int) get_attr_fp_set (insn);
12530 else
12531 return mode;
12534 static int
12535 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
12537 return NORMAL_MODE (entity);
12540 static int
12541 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
12543 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
12546 static int
12547 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
12549 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
12552 /*------------------------------------------------------------------------------
12553 Misc
12556 /* Return true if we use LRA instead of reload pass. */
12557 bool
12558 sh_lra_p (void)
12560 return sh_lra_flag;
12563 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
12565 static bool
12566 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
12567 unsigned int align,
12568 enum by_pieces_operation op,
12569 bool speed_p)
12571 switch (op)
12573 case MOVE_BY_PIECES:
12574 return by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1, op)
12575 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12576 case STORE_BY_PIECES:
12577 case SET_BY_PIECES:
12578 return by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1, op)
12579 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12580 default:
12581 return default_use_by_pieces_infrastructure_p (size, align,
12582 op, speed_p);
12586 bool
12587 sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
12588 rtx x ATTRIBUTE_UNUSED)
12590 return TARGET_FDPIC;
12593 /* Emit insns to load the function address from FUNCDESC (an FDPIC
12594 function descriptor) into r1 and the GOT address into r12,
12595 returning an rtx for r1. */
12598 sh_load_function_descriptor (rtx funcdesc)
12600 rtx r1 = gen_rtx_REG (Pmode, R1_REG);
12601 rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
12602 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
12603 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
12605 emit_move_insn (r1, fnaddr);
12606 /* The ABI requires the entry point address to be loaded first, so
12607 prevent the load from being moved after that of the GOT
12608 address. */
12609 emit_insn (gen_blockage ());
12610 emit_move_insn (pic_reg, gotaddr);
12611 return r1;
12614 /* Return an rtx holding the initial value of the FDPIC register (the
12615 FDPIC pointer passed in from the caller). */
12618 sh_get_fdpic_reg_initial_val (void)
12620 return get_hard_reg_initial_val (Pmode, PIC_REG);
12623 #include "gt-sh.h"